In [93]:
import pandas as pd
import json
from urllib.request import Request, urlopen
import re
import datetime
from datetime import datetime as dt
from bs4 import BeautifulSoup as soup

In [29]:
#ICO Watch List API Wrapper- Can be called with 'live', 'upcoming', and 'finished' on the end for their respective lists, otherwise calls all ICO's
#No API key needed, 1sec limit per call

class ICO_data():
    
    # Initiates the object (self) and allows variables to be set for use in any of the classes funtions
    def __init__(self):
        self.url =' https://api.icowatchlist.com/public/v1/'
        
    def get_json(self):
        '''
        Sends HTTP Request to provided url and returns a json (dictionary) object.

        Arguements: 'url' - Requires a full http address including any applicable API keys.
        '''
        request = Request(self.url, headers={'User-Agent': 'Python'})
        response = urlopen(request)
        raw_data = response.read()
        json_data = json.loads(raw_data)
        return json_data

    def get_ico_df(self):
        '''
        Performs the 'get_json()' funtion and converts it into a Pandas DataFrame
        '''
        json_data = self.get_json()
        ico_list = json_data['ico']['finished']
        df = pd.DataFrame(ico_list)
        return df


     
    
    def preprocess_data(self):
        '''
        Performs the 'get_df' function and removes erronus columns, converts time to DateTime objects and 
        converts the numbers to floats
        '''
        df = self.get_ico_df()
        
        df.drop(columns=['icowatchlist_url', 'image', 'website_link'], inplace = True)
        reordered_columns = ['Name', 'Description', 'Price(USD)', 'Start', 'End', 'ROI(Pct)', 'Timezone']
        df.rename(columns={'all_time_roi': 'ROI(Pct)',
                           'coin_symbol': 'Ticker',
                          'description': 'Description',
                           'end_time': 'End',
                           'name': 'Name',
                           'price_usd': 'Price(USD)',
                          'start_time': 'Start',
                           'timezone': 'Timezone',
                          }, inplace=True)
        df.set_index('Ticker', inplace = True)
        df=df.reindex(columns=reordered_columns)
        #Convert the strings to datetime objects
        df['Start'] = df['Start'].apply(lambda x: dt.strptime(x, "%Y-%m-%d %H:%M:%S"))
        df['End'] = df['End'].apply(lambda x: dt.strptime(x, "%Y-%m-%d %H:%M:%S"))
        df['Price(USD)'] = df['Price(USD)'].replace("NA",'0')
        df['ROI(Pct)'] = df['ROI(Pct)'].replace('NA','0%')
        #Split the price values that are over 1,000 at the ','
        df['Price(USD)'] = df['Price(USD)'].apply(lambda x: re.split(',', x))
        #Split the roi values at their ',', and '%'
        df['ROI(Pct)'] = df['ROI(Pct)'].apply(lambda x: re.split('[, %]', x))
        #Merge the strings back that are over 1,000 and turn them into floats
        try:
            df['Price(USD)'] = df['Price(USD)'].apply(lambda x: float(x[0] + x[1])) 
        #Convert the remaining strings back to floats    
        except:
            df['Price(USD)'] = df['Price(USD)'].apply(lambda x: float(x[0]))
        
        try:
            df['ROI(Pct)'] = df['ROI(Pct)'].apply(lambda x: float(x[0] + x[1])/100) 
        except:
            df['ROI(Pct)'] = df['ROI(Pct)'].apply(lambda x: float(x[0])/100)
        #Create a duration column from the start and end dates
        df["Duration"] = df['End'] - df['Start']
        return df

  

In [371]:
  
def check_cmc(project_name):
    '''
    Check Coin Market Cap for the project, if it exists capture the current price, market cap, volume, circulating supply, total supply, ATH, ATL
    '''
    cmc_base_url = 'https://coinmarketcap.com/currencies/'
    headers={'User-Agent':"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"} 
    request=Request(cmc_base_url + project_name, headers=headers) 
    response =urlopen(request)
    soup = BeautifulSoup(response, 'html.parser')
        
        

In [30]:
#Instantiate the boject
ico = ICO_data()

In [31]:
clean_df = ico.preprocess_data()

In [32]:
clean_df.head()

Unnamed: 0_level_0,Name,Description,Price(USD),Start,End,ROI(Pct),Timezone,Duration
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
REP,Augur,Augur is a decentralized prediction market,12.33,2015-08-17 00:00:00,2015-10-01 00:00:00,20.2598,UTC+0,45 days
LSK,Lisk,It is a cryptocurrency and decentralized appli...,0.77,2016-02-22 00:00:00,2016-03-21 00:00:00,9.3439,UTC+0,28 days
DGD,Digix DAO,A Decentralized Autonomous Organization focuse...,12.55,2016-03-30 12:00:00,2016-03-30 12:00:00,2.8735,UTC+0,0 days
WAVES,Waves,Waves helps to make the launching and coordina...,0.79,2016-04-12 13:00:00,2016-05-31 13:00:00,3.1905,UTC+0,49 days
STRAT,Stratis,Stratis was developed to help organisations de...,0.32,2016-06-21 00:00:00,2016-07-26 00:00:00,42.3561,UTC+0,35 days


In [372]:
x = check_cmc('Augur')

In [590]:
type(x)

str

In [132]:
clean_df['price_usd'] = clean_df['price_usd'].apply(lambda x: try: float(x[0] + x[1]))                 

SyntaxError: invalid syntax (<ipython-input-132-c95f2be54a51>, line 1)

In [163]:
clean_df

Unnamed: 0_level_0,all_time_roi,description,end_time,name,price_usd,start_time,timezone
coin_symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
REP,2025.98,Augur is a decentralized prediction market,2015-10-01 00:00:00,Augur,12.33,2015-08-17 00:00:00,UTC+0
LSK,934.39,It is a cryptocurrency and decentralized appli...,2016-03-21 00:00:00,Lisk,0.77,2016-02-22 00:00:00,UTC+0
DGD,287.35,A Decentralized Autonomous Organization focuse...,2016-03-30 12:00:00,Digix DAO,12.55,2016-03-30 12:00:00,UTC+0
WAVES,319.05,Waves helps to make the launching and coordina...,2016-05-31 13:00:00,Waves,0.79,2016-04-12 13:00:00,UTC+0
STRAT,4235.61,Stratis was developed to help organisations de...,2016-07-26 00:00:00,Stratis,0.32,2016-06-21 00:00:00,UTC+0
XTO,187.10,Tao Network posits itself as a digital asset m...,2016-08-15 00:00:00,Tao Network,0.03,2016-07-16 00:00:00,UTC+0
INCNT,236.17,Incent is a blockchain based loyalty program f...,2016-09-01 00:00:00,Incent,0.16,2016-09-01 00:00:00,UTC+0
NEO,6765.79,NEO is a China based smart contract Blockchain...,2016-09-07 00:00:00,NEO,10.92,2016-08-08 00:00:00,UTC+0
BLOCKPAY,-49.58,This is a retail platform that allows multicry...,2016-09-15 00:00:00,BlockPay,0.06,2016-08-15 00:00:00,UTC+0
1ST,69.72,FirstBlood is a eSports matches and betting pr...,2016-09-26 12:00:00,FirstBlood,0.12,2016-09-25 00:00:00,UTC+0


In [607]:
class Coin_data():
    
    def __init__(self):
        self.btctalk_ann_url = 'https://bitcointalk.org/index.php?board=159.0'
        self.cmc_base_url = 'https://coinmarketcap.com/currencies/'
        self.cmc_coin_url = 'https://coinmarketcap.com/all/views/all/'

    def get_cmc_coins(self):
        headers={'User-Agent':"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"} 
        request=Request(self.cmc_coin_url, headers=headers) 
        response =urlopen(request)
        soup = BeautifulSoup(response, 'html.parser')
        coin_list = soup.findAll('a' , {'class':"currency-name-container link-secondary"})
        coins = []
        for coin in coin_list:
            coins.append(coin.text)
        return coins
        
        
    def get_new_projects(self):
        #Get list of coins on Coin Market Cap
        coin_list = self.get_cmc_coins()
        
        #Prepare BS4 to scrape bitcointalk.org announcement page
        headers={'User-Agent':"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"} 
        request=Request(self.btctalk_ann_url, headers=headers) 
        response =urlopen(request)
        soup = BeautifulSoup(response, 'html.parser')
        
        #Create a list of all the post on the announcement page
        links = soup.findAll('a')
        
        #Capture the url for each post
        links = [url.get('href') for url in links if 'ANN' in url.text]
        
        #For loop over each url saving the content of each page to a dict key
        print('Looping over each url saving the content of each page to a dict key')
        coins = {}
        count = 0
        for url in links:
            count += 1
            headers={'User-Agent':"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"} 
            request=Request(url, headers=headers) 
            response =urlopen(request)
            soup = BeautifulSoup(response, 'html.parser')
            coins[count] = soup
            
        #For loop over each dict key (post html page) and captive the title, as well as search the body for key words
        print('Looping over each dict key (post html page) and captive the title, as well as search the body for key words')
        count = 1
        flags = ['gaurenteed', 'profit', 'government', 'approval', 'massive', 'mega', 'rich', 'money']
        name, rating = [],[]
        for i in range(len(coins)):
            count += 1
            scam_meter = 0

            for flag in flags:
                if flag in coins[i+1].text.lower():
                    scam_meter += 1
            name += [coins[i+1].title.text]
            rating += [scam_meter]
            
        #Create a dataframe to store the title and scam rating for each post
        df = pd.DataFrame({
            'Title':name,
            'Scam_Rating':rating,
        }) 
        
        #Extract the project name from the title
        
        
        #Extract the ticker from the title
        
        
        #Extract a start date from the body of each post
        
        
        #Extract an end date from the body of each post
        
        
        #Capture the offering price for each coin
        
        
        #Identify the Algo for each project 
        
        
        return df

In [609]:
coin = Coin_data()

In [610]:
coin_list = coin.get_cmc_coins()

In [611]:
df = coin.get_new_projects()


['Bitcoin',
 'Ethereum',
 'XRP',
 'Bitcoin Cash',
 'Tether',
 'Litecoin',
 'EOS',
 'Binance Coin',
 'Bitcoin SV',
 'Stellar',
 'TRON',
 'Cardano',
 'Monero',
 'UNUS SED LEO',
 'Chainlink',
 'Huobi Token',
 'Tezos',
 'Cosmos',
 'NEO',
 'IOTA',
 'Maker',
 'Dash',
 'Ethereum Classic',
 'Ontology',
 'USD Coin',
 'Crypto.com Coin',
 'NEM',
 'Dogecoin',
 'Basic Attenti...',
 'VeChain',
 'Zcash',
 'Paxos Standard',
 'Decred',
 'HedgeTrade',
 'Qtum',
 '0x',
 'TrueUSD',
 'Holo',
 'Bitcoin Gold',
 'V Systems',
 'Centrality',
 'Ravencoin',
 'OmiseGO',
 'Nano',
 'ZB',
 'ABBC Coin',
 'Augur',
 'Bytom',
 'LUNA',
 'Algorand',
 'EDUCare',
 'Synthetix Net...',
 'DxChain Token',
 'KuCoin Shares',
 'Dai',
 'Bitcoin Diamond',
 'Silverway',
 'Komodo',
 'Lisk',
 'BitTorrent',
 'DigiByte',
 'Siacoin',
 'HyperCash',
 'ICON',
 'Karatgold Coin',
 'Swipe',
 'Waves',
 'Quant',
 'THETA',
 'BitShares',
 'Bytecoin',
 'IOST',
 'FTX Token',
 'MonaCoin',
 'MCO',
 'Aeternity',
 'Aurora',
 'Verge',
 'Seele',
 'MaidSafeCo

In [603]:
coin_list[1].text

'Ethereum'

In [612]:
df['Title'][6]   

KeyError: 'Title'