In [1]:
import pandas as pd



In [2]:
# create a dictionary with some data
data = {
    'Coin':['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin'],
    'Price':[45000, 3000, 0.85, 120],
    'Volume':[15000, 8000, 200000, 50000],
}

df = pd.DataFrame(data)
print(df)

       Coin     Price  Volume
0   Bitcoin  45000.00   15000
1  Ethereum   3000.00    8000
2    Ripple      0.85  200000
3  Litecoin    120.00   50000


#### Explore Data

##### get basic info

In [3]:
df

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000
1,Ethereum,3000.0,8000
2,Ripple,0.85,200000
3,Litecoin,120.0,50000


In [4]:
print(df.head(2))

       Coin    Price  Volume
0   Bitcoin  45000.0   15000
1  Ethereum   3000.0    8000


In [5]:
print(df.tail(2))

       Coin   Price  Volume
2    Ripple    0.85  200000
3  Litecoin  120.00   50000


In [6]:
df.head(3)

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000
1,Ethereum,3000.0,8000
2,Ripple,0.85,200000


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Coin    4 non-null      object 
 1   Price   4 non-null      float64
 2   Volume  4 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 228.0+ bytes


### Basic Data Manipulation

In [9]:
# select a specific column

print(df[['Coin', 'Price']])

       Coin     Price
0   Bitcoin  45000.00
1  Ethereum   3000.00
2    Ripple      0.85
3  Litecoin    120.00


#### Filter Rows

In [11]:
df[df['Price'] > 200]

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000
1,Ethereum,3000.0,8000


#### SOrting Data

Sort by a column in asc or desc order

In [13]:
df.sort_values('Volume')

Unnamed: 0,Coin,Price,Volume
1,Ethereum,3000.0,8000
0,Bitcoin,45000.0,15000
3,Litecoin,120.0,50000
2,Ripple,0.85,200000


In [14]:
df.sort_values('Price', ascending= False)

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000
1,Ethereum,3000.0,8000
3,Litecoin,120.0,50000
2,Ripple,0.85,200000


#### Add new column

In [15]:
df['Price_after_2Pct'] =  df['Price'] * 1.02

In [16]:
df

Unnamed: 0,Coin,Price,Volume,Price_after_2Pct
0,Bitcoin,45000.0,15000,45900.0
1,Ethereum,3000.0,8000,3060.0
2,Ripple,0.85,200000,0.867
3,Litecoin,120.0,50000,122.4


In [17]:
df.describe()

Unnamed: 0,Price,Volume,Price_after_2Pct
count,4.0,4.0,4.0
mean,12030.2125,68250.0,12270.81675
std,22023.550649,89734.330108,22464.021662
min,0.85,8000.0,0.867
25%,90.2125,13250.0,92.01675
50%,1560.0,32500.0,1591.2
75%,13500.0,87500.0,13770.0
max,45000.0,200000.0,45900.0


In [19]:
df.to_csv('crypto_example_data.csv', index= False)

## Numpy

In [20]:
import numpy as np

#### create a Numpy Array

a numpy array is like a list but better optimized for maths operations

In [21]:
prices = np.array([45000, 50000, 40000, 20000])
prices.shape

(4,)

In [25]:
print(prices)

[45000 50000 40000 20000]


#### Data Type

In [26]:
prices.dtype

dtype('int64')

#### Basic data manipulation

In [27]:
prices_plus_1000 = prices + 1000  # Adds 1000 to all the prices in array

prices_plus_1000

array([46000, 51000, 41000, 21000])

In [29]:
# compute for 2% increase in price
price_s = prices_plus_1000

price_increase = price_s * 1.02 

price_increase

array([46920., 52020., 41820., 21420.])

In [31]:
# filter data

higher_price = price_s [price_s > 50000]

higher_price

array([51000])

### Basic statistical functions

In [32]:
volume = np.array([500000, 300000, 200000,  100000])

print ("\nStatistics for Volume: ")

print ("Mean (Average): ", np.mean(volume)) # average volume

print ("Minimum: ", np.min(volume)) #gives minin=mum volume

print ("Maximum: ", np.max(volume)) #print max volume

print ("Sum: ", np.sum(volume)) #get the total in the array

print ("VOlatility: ", np.std(volume))


Statistics for Volume: 
Mean (Average):  275000.0
Minimum:  100000
Maximum:  500000
Sum:  1100000
VOlatility:  147901.9945774904


#### Working with a 2D Array

In [37]:
# creating a 2d array

crypto_data = np.array([
    [45000, 5000],
    [30000, 28000],
    [11998, 8943]
])

print(crypto_data)

[[45000  5000]
 [30000 28000]
 [11998  8943]]


In [38]:
# Access Row

crypto_data[2]

array([11998,  8943])

In [39]:
# Access column

crypto_data[:, 1]

array([ 5000, 28000,  8943])