## Import Libraries

In [1]:
# import libraries
import pandas as pd
from pymongo import MongoClient
import binance

import sys
sys.path.append(r'C:\Users\Besitzer\Desktop\projects\Github_Repositories\cfa_binance\docs')
import config

# Define Functions

In [2]:
# Create Client
client = Client(config.APIKey, config.SecretKey)

# Define Assets
symbol01 = 'BTCUSDT'
symbol02 = 'ETHBTC'
symbol03 = 'ETHUSDT'


# Define Columns
columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume',
            'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore']

# Columns change to float
cols_float = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume',
              'taker_buy_base_asset_volume','taker_buy_quote_asset_volume', 'ignore']


def get_klines(symbol, interval, start_date):
        klines = client.get_historical_klines(symbol, interval, start_date)
        return klines

def create_dataframe(klines, columns, id_):
        df = pd.DataFrame(klines, columns=columns)
        df.insert(0, 'id_asset', id_)
        return df


def change_col_type(df, columns, typ):
    for column in columns:
        df[columns] = df[columns].astype(typ)
    
def change_to_datetime(df, *columns):
    for column in columns:
        df[column] = pd.to_datetime(df[column], unit='ms')

# Hourly Data   

In [3]:
# Hourly Data

interval = Client.KLINE_INTERVAL_1HOUR
start_date = "2017-01-01"

klines01 = get_klines(symbol01, interval, start_date)
klines02 = get_klines(symbol02, interval, start_date)
klines03 = get_klines(symbol03, interval, start_date)

df_hourly_01 = create_dataframe(klines01, columns, 1)
df_hourly_02 = create_dataframe(klines02, columns, 2)
df_hourly_03 = create_dataframe(klines03, columns, 3)

df_hourly = pd.concat([df_hourly_01, df_hourly_02, df_hourly_03]).sort_values(['timestamp', 'id_asset']).reset_index(drop=True)

# Change Type
change_col_type(df_hourly, cols_float, float)
change_col_type(df_hourly, 'number_of_trades', int)
change_to_datetime(df_hourly, 'timestamp', 'close_time')

# Show DataFrame
df_hourly.tail()

Unnamed: 0,id_asset,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
155167,2,2023-07-06 06:00:00,0.06283,0.06285,0.06265,0.06268,485.9004,2023-07-06 06:59:59.999,30.49612,852,273.658,17.17466,0.0
155168,3,2023-07-06 06:00:00,1921.79,1927.0,1919.78,1923.44,10612.8461,2023-07-06 06:59:59.999,20407610.0,14538,4946.1566,9512330.0,0.0
155169,1,2023-07-06 07:00:00,30692.57,30780.0,30688.21,30780.0,981.93416,2023-07-06 07:59:59.999,30175470.0,18972,544.48011,16733300.0,0.0
155170,2,2023-07-06 07:00:00,0.06268,0.0627,0.06256,0.06257,216.9308,2023-07-06 07:59:59.999,13.58286,677,95.2936,5.967535,0.0
155171,3,2023-07-06 07:00:00,1923.43,1926.19,1923.11,1925.28,5482.4189,2023-07-06 07:59:59.999,10549480.0,5987,2801.5108,5391130.0,0.0


# Daily Data

In [4]:
# Daily Data

interval = Client.KLINE_INTERVAL_1DAY
start_date = "2017-01-01"

klines01 = get_klines(symbol01, interval, start_date)
klines02 = get_klines(symbol02, interval, start_date)
klines03 = get_klines(symbol03, interval, start_date)

df_daily_01 = create_dataframe(klines01, columns, 1)
df_daily_02 = create_dataframe(klines02, columns, 2)
df_daily_03 = create_dataframe(klines03, columns, 3)

df_daily = pd.concat([df_daily_01, df_daily_02, df_daily_03]).sort_values(['timestamp', 'id_asset']).reset_index(drop=True)

# Change Type
change_col_type(df_daily, cols_float, float)
change_col_type(df_daily, 'number_of_trades', int)
change_to_datetime(df_daily, 'timestamp', 'close_time')

# Show DataFrame
df_daily.tail()

Unnamed: 0,id_asset,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
6479,2,2023-07-05,0.06292,0.06299,0.06249,0.06262,13514.8029,2023-07-05 23:59:59.999,848.255,29896,6411.8645,402.4303,0.0
6480,3,2023-07-05,1936.19,1942.5,1894.51,1910.36,267633.4783,2023-07-05 23:59:59.999,512774100.0,362985,128057.0596,245340700.0,0.0
6481,1,2023-07-06,30504.8,30787.42,30327.0,30786.57,7543.32988,2023-07-06 23:59:59.999,230530000.0,211108,3843.67783,117487200.0,0.0
6482,2,2023-07-06,0.06263,0.06286,0.06256,0.06257,2955.3768,2023-07-06 23:59:59.999,185.3236,6122,1623.3487,101.8002,0.0
6483,3,2023-07-06,1910.35,1927.0,1898.8,1926.32,67264.0389,2023-07-06 23:59:59.999,128772500.0,91640,35650.6576,68245010.0,0.0


# Weekly Data

In [5]:
# Weekly Data

interval = Client.KLINE_INTERVAL_1WEEK
start_date = "2017-01-01"

klines01 = get_klines(symbol01, interval, start_date)
klines02 = get_klines(symbol02, interval, start_date)
klines03 = get_klines(symbol03, interval, start_date)

df_week_01 = create_dataframe(klines01, columns, 1)
df_week_02 = create_dataframe(klines02, columns, 2)
df_week_03 = create_dataframe(klines03, columns, 3)

df_weekly = pd.concat([df_week_01, df_week_02, df_week_03]).sort_values(['timestamp', 'id_asset']).reset_index(drop=True)

# Change Type
change_col_type(df_weekly, cols_float, float)
change_col_type(df_weekly, 'number_of_trades', int)
change_to_datetime(df_weekly, 'timestamp', 'close_time')

# Show DataFrame
df_weekly.tail()

Unnamed: 0,id_asset,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
924,2,2023-06-26,0.06233,0.06369,0.060524,0.06329,190249.2,2023-07-02 23:59:59.999,11792.12,337587,92433.56,5725.751,0.0
925,3,2023-06-26,1898.8,1959.2,1816.56,1937.48,2555824.0,2023-07-02 23:59:59.999,4814136000.0,3256642,1262129.0,2378222000.0,0.0
926,1,2023-07-03,30617.02,31380.0,30200.0,30785.95,117729.4,2023-07-09 23:59:59.999,3625735000.0,2600430,54456.24,1676794000.0,0.0
927,2,2023-07-03,0.06329,0.06435,0.06212,0.06257,74491.7,2023-07-09 23:59:59.999,4703.977,133579,36601.38,2311.629,0.0
928,3,2023-07-03,1937.49,1976.16,1894.51,1925.95,862554.6,2023-07-09 23:59:59.999,1673950000.0,1166430,414236.7,803758300.0,0.0


# Monthly Data

In [6]:
# Monthly Data

interval = Client.KLINE_INTERVAL_1MONTH
start_date = "2017-01-01"

klines01 = get_klines(symbol01,interval, start_date)
klines02 = get_klines(symbol02,interval, start_date)
klines03 = get_klines(symbol03,interval, start_date)

df_monthly_01 = create_dataframe(klines01, columns, 1)
df_monthly_02 = create_dataframe(klines02, columns, 2)
df_monthly_03 = create_dataframe(klines03, columns, 3)

df_monthly = pd.concat([df_monthly_01, df_monthly_02, df_monthly_03]).sort_values(['timestamp', 'id_asset']).reset_index(drop=True)

# Change Type
change_col_type(df_monthly, cols_float, float)
change_col_type(df_monthly, 'number_of_trades', int)
change_to_datetime(df_monthly, 'timestamp', 'close_time')

# Show DataFrame
df_monthly.tail()

Unnamed: 0,id_asset,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
212,2,2023-06-01,0.06886,0.07088,0.060524,0.06344,848720.4,2023-06-30 23:59:59.999,55395.03,1601905,404361.8,26414.59,0.0
213,3,2023-06-01,1873.63,1948.6,1626.01,1933.79,10385950.0,2023-06-30 23:59:59.999,18913800000.0,14361542,5088607.0,9265203000.0,0.0
214,1,2023-07-01,30471.99,31380.0,30155.0,30778.23,158522.8,2023-07-31 23:59:59.999,4870787000.0,3760064,73189.68,2248618000.0,0.0
215,2,2023-07-01,0.06345,0.06435,0.06212,0.06257,117615.7,2023-07-31 23:59:59.999,7421.152,212547,56572.23,3569.581,0.0
216,3,2023-07-01,1933.8,1976.16,1885.75,1925.75,1296791.0,2023-07-31 23:59:59.999,2509045000.0,1854592,629873.2,1218530000.0,0.0


# Create Asset Table 

In [7]:
df_assets = pd.DataFrame({'id_asset' : [1,2,3],
                         'name' : ['BTCUSDT', 'ETHBTC', 'ETHUSDT']})

df_assets

Unnamed: 0,id_asset,name
0,1,BTCUSDT
1,2,ETHBTC
2,3,ETHUSDT


## Save data to csv

In [9]:
path = '/Users/macbook/Desktop/projects/Github_Repositories/cfa_binance/data/raw'
df_hourly.to_csv(f"{path}/All_2017_01_01_to_2023_07_06_hourly.csv", index=False)
df_daily.to_csv(f"{path}/All_2017_01_01_to_2023_07_06_daily.csv", index=False)
df_weekly.to_csv(f"{path}/All_2017_01_01_to_2023_07_06_weekly.csv", index=False)
df_monthly.to_csv(f"{path}/All_2017_01_01_to_2023_07_06_monthly.csv", index=False)

# PgAdming4 Connection

## Create tables for database

### Create conn and engine

In [None]:
conda remove psycopg2

In [3]:
import psycopg2
from sqlalchemy import create_engine,select, insert

conn = psycopg2.connect(database="binance",
			user='abdullahcay', password=12345,
			host='127.0.0.1', port='5434'
)

conn.autocommit = True
cursor = conn.cursor()

string = 'postgresql://abdullahcay:12345@localhost:5434/binance'
engine = create_engine(string)

SyntaxError: invalid syntax (2635119378.py, line 1)

In [None]:
table_name_list = ['hourly','daily','weekly','monthly']

for table_name in table_name_list: 
    table_query = f'''CREATE TABLE IF NOT EXISTS {table_name} (id serial PRIMARY KEY,
                        id_asset int,
                        timestamp TIMESTAMP,
                        open NUMERIC, 
                        high NUMERIC,
                        low NUMERIC,
                        close NUMERIC,
                        volume NUMERIC,
                        close_time TIMESTAMP,
                        quote_asset_volume NUMERIC,
                        number_of_trades NUMERIC,
                        taker_buy_base_asset_volume NUMERIC,
                        taker_buy_quote_asset_volume NUMERIC,
                        ignore NUMERIC),
                        FOREIGN KEY (id_asset) REFERENCES assets(id);'''
    
    cursor.execute(table_query)
    print(f'{table_name} table created.')

In [16]:
table_name = 'assets'
table_query = f'''CREATE TABLE IF NOT EXISTS {table_name} (
                        id_asset int PRIMARY KEY,
                        name VARCHAR(10));'''
cursor.execute(table_query)
print(f'{table_name} table created.')
    
df_assets.to_sql(f'{table_name}', engine, if_exists='append', index = False)
print(f'{table_name} table added to database.')

assets table created.
assets table added to database.


## Add tables to database

In [17]:
table_name_list = ['hourly','daily','weekly','monthly', 'assets']
df_list = [df_hourly, df_daily, df_weekly, df_monthly, df_assets]

#  Note:  if_exists can be append, replace, fail.  
for df, table_name in zip(df_list, table_name_list):
    df.to_sql(f'{table_name}', engine, if_exists='append', index = False)
    print(f'{table_name} table added to database.')

hourly table added to database.
daily table added to database.
weekly table added to database.
monthly table added to database.
assets table added to database.


## First Query for pgAdmin 

In [20]:
# Query
table_name_list = ['hourly','daily','weekly','monthly']
for table_name in table_name_list:
    sql01 = f'''SELECT id_asset,timestamp,close FROM {table_name} Order By timestamp DESC LIMIT 1'''
    cursor.execute(sql01)
    for i in cursor.fetchall():
        print(f'{table_name} table ==> {i}')

# Commit 
conn.commit()

hourly table ==> (1, datetime.datetime(2023, 7, 6, 7, 0), 30780.0)
daily table ==> (1, datetime.datetime(2023, 7, 6, 0, 0), 30786.57)
weekly table ==> (1, datetime.datetime(2023, 7, 3, 0, 0), 30785.95)
monthly table ==> (1, datetime.datetime(2023, 7, 1, 0, 0), 30778.23)


In [1]:
conda remove psycopg2

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: c:\Users\Besitzer\anaconda3

  removed specs:
    - psycopg2


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    abseil-cpp-20211102.0      |       hd77b12b_0         1.7 MB
    arrow-cpp-11.0.0           |  py310h7cee713_0         7.2 MB
    aws-c-common-0.4.57        |       ha925a31_1         147 KB
    aws-c-event-stream-0.1.6   |       hd77b12b_5          26 KB
    aws-checksums-0.1.9        |       ha925a31_0          50 KB
    aws-sdk-cpp-1.8.185        |       hd77b12b_0         2.5 MB
    bokeh-3.1.1                |  py310h9909e9c_0         6.7 MB
    boost-cpp-1.73.0           |      h2bbff1b_12          16 KB
    c-ares-1.19.0              |       h2bbff1b_0         117 KB
    cloudpickle-2.2.1          |  py310haa95532_0          


The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/win-64::intake==0.6.7=py310haa95532_0


In [2]:
pip uninstall psycopg2

Note: you may need to restart the kernel to use updated packages.




In [4]:
pip freeze

alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
anaconda-client==1.11.2
anaconda-navigator==2.4.2
anaconda-project @ file:///C:/Windows/TEMP/abs_91fu4tfkih/croots/recipe/anaconda-project_1660339890874/work
anyio @ file:///C:/ci/anyio_1644481856696/work/dist
appdirs==1.4.4
argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644569876605/work
arrow @ file:///C:/b/abs_cal7u12ktb/croot/arrow_1676588147908/work
astroid @ file:///C:/b/abs_d4lg3_taxn/croot/astroid_1676904351456/work
astropy @ file:///C:/ci/astropy_1657719642921/work
asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
atomicwrites==1.4.0
attrs @ file:///C:/b/abs_09s3y775ra/croot/attrs_1668696195628/work
Automat @ file:///tmp/build/80754af9/automat_1600298431173/work
autopep8 @ file:///opt/conda/conda-bld/autopep8_1650463822033/work
Babel @ file:///C:/b/abs_a2shv_3tqi/croot/babel_1671782804377/work
backcall @ fil

