In [None]:
# Importing pandas
import pandas as pd

# Importing matplotlib and setting aesthetics for plotting later.
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg' 
plt.style.use('fivethirtyeight')

# Data was downloaded from coinmarketcap.com and uploaded as json file
current = pd.read_json('../input/crypto.json')

# Printing out the first few lines
current.head()

In [None]:
# Selecting the 'id' and the 'market_cap_usd' columns
market_cap = current[['id','market_cap_usd']]

# Counting the number of values
market_cap.count()


some cryptocurrencies listed in coinmarketcap.com have no known market capitalization, this is represented by NaN in the data, and NaNs are not counted by count(). These cryptocurrencies are of little interest to us in this analysis, so they are safe to remove.

In [None]:
cap = market_cap.query('market_cap_usd > 0')
# cap = market_cap_raw.dropna()
# Counting the number of values again
cap.count()

At the time of writing, Bitcoin is under serious competition from other projects, but it is still dominant in market capitalization. Let's plot the market capitalization for the top 10 coins as a barplot to better visualize this.

In [None]:
#Declaring these now for later use in the plots
TOP_CAP_TITLE = 'Top 10 market capitalization'
TOP_CAP_YLABEL = '% of total cap'

# Selecting the first 10 rows and setting the index
cap10 = market_cap.sort_values(by='market_cap_usd',ascending=False).head(10)
cap10.set_index('id',inplace=True)

# Calculating market_cap_perc
cap10 = cap10.assign(market_cap_perc= lambda x: x/cap['market_cap_usd'].sum()*100)

# # Plotting the barplot with the title defined above 
ax = cap10.plot.bar(y='market_cap_perc',title=TOP_CAP_TITLE, rot=60)

# Annotating the y axis with the label defined above
ax.set_ylabel(TOP_CAP_YLABEL)

While the plot above is informative enough, it can be improved. Bitcoin is too big, and the other coins are hard to distinguish because of this. Instead of the percentage, let's use a log10 scale of the "raw" capitalization. Plus, let's use color to group similar coins and make the plot more informative.

For the colors rationale: bitcoin-cash and bitcoin-gold are forks of the bitcoin blockchain. 

Ethereum and Cardano both offer Turing Complete smart contracts. 

Iota and Ripple are not minable. Dash, Litecoin, and Monero get their own color.

In [None]:
# Colors for the bar plot
COLORS = ['orange', 'green', 'orange', 'cyan', 'cyan', 'blue', 'silver', 'orange', 'red', 'green']

# Plotting market_cap_usd as before but adding the colors and scaling the y-axis  
ax = cap10.plot.bar(y='market_cap_usd', colors=COLORS, logy=True, title=TOP_CAP_TITLE, rot=60)

# Annotating the y axis with 'USD'
ax.set_ylabel("USD")

# Final touch! Removing the xlabel as it is not very informative
ax.set_xlabel("")

**Volatility**

Volatility means the liability to change rapidly and unpredictably, especially for the worse. we know this to be extra true for crypto currencies.

We want to explore this volatility by selecting and plotting the 24 hours and 7 days percentage changes.




In [None]:
# Selecting the id, percent_change_24h and percent_change_7d columns
volatility = current[['id','percent_change_24h','percent_change_7d']]

# Setting the index to 'id' and dropping all NaN rows
volatility = volatility.set_index('id').dropna()

# Sorting the DataFrame by percent_change_24h in ascending order
volatility = volatility.sort_values(by='percent_change_24h',ascending=True)

# Checking the first few rows
volatility.head()


Let's plot the top 10 biggest gainers and top 10 losers in market capitalization **during 24h.**

In [None]:
#Defining a function with 2 parameters, the series to plot and the title
def top10_subplot(volatility_series, title):
    plt.figure()
    # Making the subplot and the figure for two side by side plots
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))
    
    # Plotting with pandas the barchart for the top 10 losers
    ax = volatility_series[:10].plot.bar(colormap='autumn',ax=axes[0])
    
    # Setting the figure's main title to the text passed as parameter
    fig.suptitle(title)
    
    # Setting the ylabel to '% change'
    ax.set_ylabel("% change")
    
    # Same as above, but for the top 10 winners
    ax = volatility_series[-10:].plot.bar(ax=axes[1],colormap='winter')
    
    # Returning this for good practice, might use later
    return fig, ax

DTITLE = "24 hours top losers and winners"

# Calling the function above with the 24 hours period series and title DTITLE  
fig, ax = top10_subplot(volatility.percent_change_24h,DTITLE)

Let's plot the top 10 biggest gainers and top 10 losers in market capitalization **in a week.**

In [None]:
# Sorting in ascending order
volatility7d = volatility.percent_change_7d.sort_values(ascending=True)

WTITLE = "Weekly top losers and winners"

# Calling the top10_subplot function
fig, ax = top10_subplot(volatility7d,WTITLE)

To help understand volatility and how risky crypto currencies are,

let's plot the change rate of the last 24h together with the change rate for the entire week for each top 10 worst and best crypto currencies per week.

In [None]:
# Sorting in ascending order
volatilityTogether = volatility.sort_values(by='percent_change_7d',ascending=True)

WTITLE = "Comparing Weekling with Daily losers and winners"

# Calling the top10_subplot function
fig, ax = top10_subplot(volatilityTogether,WTITLE)

Last thing i would like to do for now is to build a bar plot classifying the currencies based on their market cap definitions from Investopedia:

Large cap: +10 billion.
Mid cap: 2 billion - 10 billion.
Small cap: 300 million - 2 billion.
Micro cap: 50 million - 300 million.
Nano cap: Below 50 million.



In [None]:
def capcount(query_string):
    return cap.query(query_string).count().id

# Labels for the plot
LABELS = ["Large", "Mid","Small","Micro", "Nano"]

Large = capcount('market_cap_usd>=10e+10')
Mid = capcount('market_cap_usd>=2e+10 & market_cap_usd<10e+10')
Small = capcount('market_cap_usd>=300e+6 & market_cap_usd<2e+10')
Micro = capcount('market_cap_usd>=50e+6 & market_cap_usd<300e+6')
Nano =  capcount('market_cap_usd<50e+6')

# Making a list with the 3 counts
values = [Large,Mid,Small,Micro,Nano]

# Plotting them with matplotlib 
plt.bar(LABELS,values,color=['red','blue','green','Orange','purple'])

Yeah, it seems obvious that most of them would have cap value of less than 50 mil.

but i would like regroup the results a bit to take into account those crypto currencies with large cap value as well:

In [None]:
# Making a nice function for counting different marketcaps from the
# "cap" DataFrame. Returns an int.
# INSTRUCTORS NOTE: Since you made it to the end, consider it a gift :D
def capcount(query_string):
    return cap.query(query_string).count().id

# Labels for the plot
LABELS = ["biggish", "micro", "nano"]

# Using capcount count the biggish cryptos
biggish = capcount('market_cap_usd>=300e+6')

# Same as above for micro ...
micro = capcount('market_cap_usd>=50e+6 & market_cap_usd<=300e+6')

# ... and for nano
nano =  capcount('market_cap_usd<=50e+6')

# Making a list with the 3 counts
values = [biggish,micro,nano]

# Plotting them with matplotlib 
plt.bar(LABELS,values,color=['red','blue','green'])