In [1]:
#Load libraries
import pandas as pd
import numpy as np

In [2]:
#Read data to DataFrame
df = pd.read_json('currencies.json')

In [3]:
#Let's get a sense of the data
df.head()

Unnamed: 0,date,day_close,day_high,day_low,day_market_cap,day_open,day_volume,long_name,market_rank,short_name
0,2019-04-09,4.9e-05,4.9e-05,3.7e-05,1296018,4.2e-05,225,Elite,#1262,(1337)
1,2019-04-08,4.2e-05,4.6e-05,3.9e-05,1108819,4.3e-05,162,Elite,#1262,(1337)
2,2019-04-09,0.33109,0.335818,0.25941,-,0.26261,18090,1irstcoin,#1977,(FST)
3,2019-04-09,0.725394,0.733987,0.717767,-,0.72559,7063,1SG,#2009,(1SG)
4,2019-04-09,0.17721,0.20411,0.160943,-,0.177008,4125,COIN,#2023,(1X2)


In [4]:
#DataFrame structure
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 537630 entries, 0 to 537629
Data columns (total 10 columns):
date              537630 non-null datetime64[ns]
day_close         537630 non-null object
day_high          537630 non-null object
day_low           537630 non-null object
day_market_cap    537630 non-null object
day_open          537630 non-null object
day_volume        537630 non-null object
long_name         537630 non-null object
market_rank       537630 non-null object
short_name        537630 non-null object
dtypes: datetime64[ns](1), object(9)
memory usage: 41.0+ MB


In [5]:
#Clean market rank variable
df['market_rank'] = (
df['market_rank']
    .str.replace('#','')
    .astype(int)
)

#Clean short name variable
df['short_name'] = (
df['short_name']
    .str.replace('(','')
    .str.replace(')','')
)

#Clean day close variable
df['day_close'] =  (
df['day_close']
    .str.replace(',','')
    .astype(float)
)
#Check results
df.head()

Unnamed: 0,date,day_close,day_high,day_low,day_market_cap,day_open,day_volume,long_name,market_rank,short_name
0,2019-04-09,4.9e-05,4.9e-05,3.7e-05,1296018,4.2e-05,225,Elite,1262,1337
1,2019-04-08,4.2e-05,4.6e-05,3.9e-05,1108819,4.3e-05,162,Elite,1262,1337
2,2019-04-09,0.33109,0.335818,0.25941,-,0.26261,18090,1irstcoin,1977,FST
3,2019-04-09,0.725394,0.733987,0.717767,-,0.72559,7063,1SG,2009,1SG
4,2019-04-09,0.17721,0.20411,0.160943,-,0.177008,4125,COIN,2023,1X2


In [11]:
#Sort DataFrame
df.sort_values(['market_rank', 'date'], ascending = [True, False], inplace = True)
df.head()

Unnamed: 0,date,day_close,day_high,day_low,day_market_cap,day_open,day_volume,long_name,market_rank,short_name,close_diff
352561,2019-04-09,0.000103,0.000157,5.2e-05,3361593,0.000106,210,Pandacoin,576,PND,
352571,2019-04-08,0.000106,0.000158,0.000101,3436161,0.000101,1437,Pandacoin,576,PND,3e-06
352581,2019-04-07,0.000101,0.000154,0.0001,3296951,0.000101,117,Pandacoin,576,PND,-5e-06
352591,2019-04-06,0.000101,0.000152,0.0001,3299026,0.000103,177,Pandacoin,576,PND,0.0
352601,2019-04-05,0.000103,0.000151,9.8e-05,3347236,9.8e-05,233,Pandacoin,576,PND,2e-06


In [7]:
#Take first differnces
df['close_diff'] = df.groupby(['short_name'])['day_close'].transform(lambda x: x.diff())
df.head()

Unnamed: 0,date,day_close,day_high,day_low,day_market_cap,day_open,day_volume,long_name,market_rank,short_name,close_diff
532691,2019-04-09,0.039118,0.039295,0.012675,76143,0.012963,0,ZoZoCoin,1558,ZZC,
532701,2019-04-08,0.012963,0.016389,0.012916,25232,0.013727,5,ZoZoCoin,1558,ZZC,-0.026155
532710,2019-04-07,0.013724,0.022234,0.012722,26714,0.012732,65,ZoZoCoin,1558,ZZC,0.000761
532711,2019-04-06,0.012745,0.01966,0.012598,24808,0.01476,12,ZoZoCoin,1558,ZZC,-0.000979
532720,2019-04-05,0.014756,0.075004,0.014709,28723,0.015945,13,ZoZoCoin,1558,ZZC,0.002011


In [10]:
#Create example df
ethereum_df = df.loc[df['short_name'] == 'ETH']
ethereum_df.head()

Unnamed: 0,date,day_close,day_high,day_low,day_market_cap,day_open,day_volume,long_name,market_rank,short_name,close_diff
