In [1]:
import pandas as pd
import os
import mysql.connector
from mysql.connector import Error
from sqlalchemy import create_engine

The following cell takes in all of the data files provided and combines them into 1 master file 

In [3]:
# combine All Files into 1 
rootDir = '../data'
master_fname= 'Master_Data_File.csv'
file_dict={}
years= []

for dirName, subdirList, fileList in os.walk(rootDir):
    if (dirName == rootDir):
        print('In directory: %s' % dirName)
        for fname in fileList:   
            split = fname.split('_')
            if (len(split)>3):
                print('\t Loading  '+fname)
                years.append(split[2])
                file_dict[split[2]]= fname
        
years.sort()
years.reverse()

data_files = []
for year in years: 
    file_path = os.path.join(rootDir, file_dict[year])
    tempdata = pd.read_csv(file_path, skiprows=1)
    data_files.append(tempdata)
    
master_data_file = pd.concat(data_files,ignore_index=True)
master_data_file.to_csv(master_fname, index=False)

print('done')
master_data_file


In directory: ../data
	 Loading  gemini_BTCUSD_2015_1min.csv
	 Loading  gemini_BTCUSD_2016_1min.csv
	 Loading  gemini_BTCUSD_2017_1min.csv
	 Loading  gemini_BTCUSD_2018_1min.csv
	 Loading  gemini_BTCUSD_2019_1min.csv
	 Loading  gemini_BTCUSD_2020_1min.csv
	 Loading  gemini_BTCUSD_2021_1min.csv
done


Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,1634429100000,2021-10-17 00:05:00,BTCUSD,60716.28,60716.28,60712.32,60715.55,0.022586
1,1634429040000,2021-10-17 00:04:00,BTCUSD,60809.75,60829.33,60716.28,60716.28,0.216773
2,1634428980000,2021-10-17 00:03:00,BTCUSD,60922.25,60925.99,60809.75,60809.75,0.108527
3,1634428920000,2021-10-17 00:02:00,BTCUSD,60875.98,60937.30,60854.84,60922.25,1.135391
4,1634428860000,2021-10-17 00:01:00,BTCUSD,60877.53,60877.53,60851.85,60875.98,0.182190
...,...,...,...,...,...,...,...,...
3063873,1444311840,2015-10-08 13:44:00,BTCUSD,242.96,242.96,242.96,242.96,0.033491
3063874,1444311780,2015-10-08 13:43:00,BTCUSD,242.95,242.96,242.95,242.96,0.010000
3063875,1444311720,2015-10-08 13:42:00,BTCUSD,242.95,242.95,242.95,242.95,0.000000
3063876,1444311660,2015-10-08 13:41:00,BTCUSD,242.50,242.95,242.50,242.95,0.001000


The following Cell takes the master data frame and reverses the order

In [4]:
data_frame = master_data_file.sort_index(axis=0 ,ascending=False)
data_frame = data_frame.reset_index(drop=True)
data_frame

Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,1444311600,2015-10-08 13:40:00,BTCUSD,0.00,242.50,0.00,242.50,0.050000
1,1444311660,2015-10-08 13:41:00,BTCUSD,242.50,242.95,242.50,242.95,0.001000
2,1444311720,2015-10-08 13:42:00,BTCUSD,242.95,242.95,242.95,242.95,0.000000
3,1444311780,2015-10-08 13:43:00,BTCUSD,242.95,242.96,242.95,242.96,0.010000
4,1444311840,2015-10-08 13:44:00,BTCUSD,242.96,242.96,242.96,242.96,0.033491
...,...,...,...,...,...,...,...,...
3063873,1634428860000,2021-10-17 00:01:00,BTCUSD,60877.53,60877.53,60851.85,60875.98,0.182190
3063874,1634428920000,2021-10-17 00:02:00,BTCUSD,60875.98,60937.30,60854.84,60922.25,1.135391
3063875,1634428980000,2021-10-17 00:03:00,BTCUSD,60922.25,60925.99,60809.75,60809.75,0.108527
3063876,1634429040000,2021-10-17 00:04:00,BTCUSD,60809.75,60829.33,60716.28,60716.28,0.216773


The following cell contains the function that grabs the environment variables 

In [5]:
# for getting ENv vars 
env_vars = {} # or dict {}
with open('env.txt') as f:
    for line in f:
        if line.startswith('#') or not line.strip():
            continue
        key, value = line.strip().split('=', 1)
        env_vars[key]= value
print(env_vars)

{'HOST': '64.20.33.250', 'DB': 'st19945_tickerData', 'USER': 'st19945_tickerData', 'PASSWORD': 'CU2021'}


Run the following cell to ensure connection can be established

In [6]:
try:
    connection = mysql.connector.connect(host=env_vars['HOST'],
                                         database=env_vars['DB'],
                                         user=env_vars['USER'],
                                         password=env_vars['PASSWORD'])
    if connection.is_connected():
        db_Info = connection.get_server_info()
        print("Connected to MySQL Server version ", db_Info)
        cursor = connection.cursor()
        cursor.execute("select database();")
        record = cursor.fetchone()
        print("You're connected to database: ", record)

except Error as e:
    print("Error while connecting to MySQL", e)
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed")

Connected to MySQL Server version  5.5.5-10.4.21-MariaDB
You're connected to database:  ('st19945_tickerData',)
MySQL connection is closed


The following Cell sends the Master data file to SQL database

In [7]:
# create sqlalchemy engine
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}"
                       .format(user=env_vars['USER'],
                               pw=env_vars['PASSWORD'],
                               host=env_vars['HOST'],
                               db=env_vars['DB']))

In [None]:
################################################
# Do not run unless you are pushing the whole new data to DB 
################################################

data_frame.to_sql('BTC_Ticker_Data', con = engine, if_exists = 'append', chunksize = 1000)