**This is the final project in Data Analysis.**

Goals:
1. Analyze the cryptocurrency market in the selected time period
2. Try to predict the results of exchange rates (without focusing on events)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import datetime as dt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import plotly.express as px
from plotly import tools
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go

import xgboost as xgb

from sklearn.linear_model import LinearRegression as LinReg
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

Read the data and drop the symbols

In [2]:
df = pd.read_csv("crypto-markets.csv")
df = df.drop(['symbol'], axis=1)

In [3]:
groupByDate = df.groupby('date', as_index=False).sum()  # by date
groupByName = df.groupby('name', as_index=False).sum()  # by name
groupByMarketCap = df.groupby('market', as_index=False).sum()  # by market capitalization
groupByVolume = df.groupby('volume', as_index=False).sum()  # by market volume
groupByRanknow = df.groupby('ranknow', as_index=False).sum()  # by rank
groupBySpread = df.groupby('spread', as_index=False).sum()  # by spread (is the $USD difference between the high and low values for the day)

In [4]:
groupByRanknow.head(10)

Unnamed: 0,ranknow,open,high,low,close,volume,market,close_ratio,spread
0,1,4666900.0,4807782.0,4507706.0,4670641.0,2961191000000.0,77295770000000.0,1104.9198,300076.35
1,2,317.5345,336.0558,298.3503,317.9459,385171500000.0,12230080000000.0,922.9499,37.15
2,3,258412.6,268531.0,246686.1,258434.7,1034702000000.0,25178030000000.0,626.107,21844.86
3,4,114.0851,120.8061,106.9132,114.2224,46632870000.0,2045230000000.0,757.8067,13.36
4,5,456412.5,484750.1,427623.7,455839.1,373338000000.0,7692101000000.0,232.9049,57126.41
5,6,3232.716,3406.773,3036.923,3233.056,309476000000.0,2466003000000.0,263.0976,369.89
6,7,63549.41,66406.01,60353.23,63554.7,282514800000.0,3336887000000.0,1004.3938,6052.78
7,8,1367.704,1375.792,1361.541,1367.835,1058689000000.0,878039600000.0,363.4101,14.13
8,9,2028.35,2433.83,1639.37,2033.75,2895873000.0,6984963000.0,9.8556,794.46
9,10,86.99238,92.56046,80.53808,86.97284,64881400000.0,2254232000000.0,211.8733,11.84


Traders still like to analyze the concept of HLC (and OHLC|HL) [proof](https://www.mypivots.com/dictionary/definition/92/hlc-3)

In [5]:
df['hl_average'] = (df['high'] + df['low']) / 2
df['hlc_average'] = (df['high'] + df['low'] + df['close']) / 3
df['ohlc_average'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4

Checking other currencies

In [6]:
top10 = df[(df['ranknow'] >= 1) & (df['ranknow'] <= 10)]
top10.name.unique()

array(['Bitcoin', 'XRP', 'Ethereum', 'Stellar', 'Bitcoin Cash', 'EOS',
       'Litecoin', 'Tether', 'Bitcoin SV', 'Cardano'], dtype=object)

*Volume* - All trades buys and sells that were made during that time (for example 24 hours like coinmarketcap does by default).

*Circulating supply* - number of coins mined and existing right now.

*Marketcap* = circulating supply multiply by price of coin.

In [7]:
fig = px.pie(top10, values='volume', names='name', title='Cryptocurrencies Top-10 by Transaction Volume')
fig.show()

In [8]:
fig = px.pie(top10, values='market', names='name', title='Cryptocurrencies Top-10 by Market capitalization')
fig.show()

In [9]:
fig = tools.make_subplots(subplot_titles=('Time'))
for name in top10.name.unique():
    currency = top10[top10['name'] == name]
    trace = go.Scatter(x=currency['date'], y=currency['ohlc_average'], name=name)
    fig.append_trace(trace, 1, 1)
    
fig['layout'].update(title='Top-10 Cryptocurrencies Comparison')
fig['layout']['yaxis1'].update(title='USD')
fig.show()


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



Adding minor cryptocurrencies that not affect too much on the market

In [10]:
top10minorCurrencies = df[(df['ranknow'] >= 11) & (df['ranknow'] <= 21)]

top10minorCurrencies.name.unique()

array(['Monero', 'TRON', 'IOTA', 'Dash', 'NEM', 'Binance Coin', 'NEO',
       'Ethereum Classic', 'Zcash', 'Bitcoin Gold', 'Tezos'], dtype=object)

In [11]:
fig = px.pie(top10minorCurrencies, values='volume', names='name', title='Minor Cryptocurrencies by Transaction Volume')
fig.show()

In [12]:
fig = px.pie(top10minorCurrencies, values='market', names='name', title='Minor Cryptocurrencies by Market capitalization')
fig.show()

In [13]:
top10loserCoins = df[(df['ranknow'] >= max(df['ranknow']) - 10) & (df['ranknow'] <= max(df['ranknow']))]

top10loserCoins.name.unique()

array(['ALLCOIN', 'EmaratCoin', 'Agrolot', 'ZTCoin', 'PlayCoin [ERC20]',
       'Dragon Token', 'OBXcoin', 'APOT', 'Bgogo Token',
       'UniversalRoyalCoin', 'EtherDelta Token'], dtype=object)

In [14]:
fig = px.pie(top10loserCoins, values='volume', names='name', title='Loser Cryptocurrencies by Transaction Volume')
fig.show()

In [15]:
fig = px.pie(top10loserCoins, values='market', names='name', title='Loser Cryptocurrencies by Market capitalization')
fig.show()