In [84]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 

%matplotlib inline

In [85]:
fear_greed = pd.read_csv('fear_greed_index.csv')
historical_data = pd.read_csv('historical_data.csv')

In [86]:
# Explore Fear & Greed Index
fear_greed.head()

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [87]:
fear_greed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2644 entries, 0 to 2643
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   timestamp       2644 non-null   int64 
 1   value           2644 non-null   int64 
 2   classification  2644 non-null   object
 3   date            2644 non-null   object
dtypes: int64(2), object(2)
memory usage: 82.8+ KB


In [88]:
fear_greed['date'] = pd.to_datetime(fear_greed['date'])

In [89]:
fear_greed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2644 entries, 0 to 2643
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   timestamp       2644 non-null   int64         
 1   value           2644 non-null   int64         
 2   classification  2644 non-null   object        
 3   date            2644 non-null   datetime64[ns]
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 82.8+ KB


In [90]:
fear_greed.duplicated().any()

np.False_

In [91]:
fear_greed['classification'].value_counts()

classification
Fear             781
Greed            633
Extreme Fear     508
Neutral          396
Extreme Greed    326
Name: count, dtype: int64

In [92]:
# remove unnecessary columns 
fear_greed.drop(['timestamp' , 'value'] , axis=1 , inplace=True)

In [93]:
# Explore Historical Trader Data 
historical_data.head()

Unnamed: 0,Account,Coin,Execution Price,Size Tokens,Size USD,Side,Timestamp IST,Start Position,Direction,Closed PnL,Transaction Hash,Order ID,Crossed,Fee,Trade ID,Timestamp
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0


In [94]:
historical_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Account           211224 non-null  object 
 1   Coin              211224 non-null  object 
 2   Execution Price   211224 non-null  float64
 3   Size Tokens       211224 non-null  float64
 4   Size USD          211224 non-null  float64
 5   Side              211224 non-null  object 
 6   Timestamp IST     211224 non-null  object 
 7   Start Position    211224 non-null  float64
 8   Direction         211224 non-null  object 
 9   Closed PnL        211224 non-null  float64
 10  Transaction Hash  211224 non-null  object 
 11  Order ID          211224 non-null  int64  
 12  Crossed           211224 non-null  bool   
 13  Fee               211224 non-null  float64
 14  Trade ID          211224 non-null  float64
 15  Timestamp         211224 non-null  float64
dtypes: bool(1), float64(

In [95]:
historical_data.isnull().any().sum()

np.int64(0)

In [96]:
# Drop unnecessary columns 
historical_data.drop(['Coin' , 'Direction' , 'Transaction Hash' , 'Order ID' , 'Trade ID' , 'Timestamp' , 'Crossed'] , axis=1 , inplace=True)

#### Important columns releated to Historical trader dataset

| Column            | Reason                                  |
| ----------------- | --------------------------------------- |
| `Account`         | To group by trader behavior             |
| `Execution Price` | To calculate trade values / trends      |
| `Size USD`        | Trade volume — critical                 |
| `Side`            | Buy/Sell behavior                       |
| `Timestamp IST`   | For date extraction and sentiment merge |
| `Closed PnL`      | For profitability analysis              |
| `Fee`             | To assess cost/risk of trades           |
| `Start Position`  | To analyze risk/exposure                |


In [97]:
historical_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Account          211224 non-null  object 
 1   Execution Price  211224 non-null  float64
 2   Size Tokens      211224 non-null  float64
 3   Size USD         211224 non-null  float64
 4   Side             211224 non-null  object 
 5   Timestamp IST    211224 non-null  object 
 6   Start Position   211224 non-null  float64
 7   Closed PnL       211224 non-null  float64
 8   Fee              211224 non-null  float64
dtypes: float64(6), object(3)
memory usage: 14.5+ MB


In [98]:
historical_data['date'] = historical_data['Timestamp IST'].str.split(" ").str[0]

# extract standardized date format
historical_data['date'] = pd.to_datetime(historical_data['date'] , format='mixed' , dayfirst=True , errors='coerce')
historical_data['date'] = historical_data['date'].dt.strftime('%Y-%m-%d')

historical_data['date'] = pd.to_datetime(historical_data['date'])


In [99]:
historical_data.drop('Timestamp IST' , axis=1 , inplace=True)

In [100]:
historical_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Account          211224 non-null  object        
 1   Execution Price  211224 non-null  float64       
 2   Size Tokens      211224 non-null  float64       
 3   Size USD         211224 non-null  float64       
 4   Side             211224 non-null  object        
 5   Start Position   211224 non-null  float64       
 6   Closed PnL       211224 non-null  float64       
 7   Fee              211224 non-null  float64       
 8   date             211224 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(6), object(2)
memory usage: 14.5+ MB


In [101]:
historical_data.duplicated().any().sum()

np.int64(0)

In [102]:
# Merge both datasets & save the dataset
merged_data = pd.merge(historical_data , fear_greed , on='date' , how='inner')
merged_data.head()

Unnamed: 0,Account,Execution Price,Size Tokens,Size USD,Side,Start Position,Closed PnL,Fee,date,classification
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,7.9769,986.87,7872.16,BUY,0.0,0.0,0.345404,2024-12-02,Extreme Greed
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,7.98,16.0,127.68,BUY,986.524596,0.0,0.0056,2024-12-02,Extreme Greed
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,7.9855,144.09,1150.63,BUY,1002.518996,0.0,0.050431,2024-12-02,Extreme Greed
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,7.9874,142.98,1142.04,BUY,1146.558564,0.0,0.050043,2024-12-02,Extreme Greed
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,7.9894,8.73,69.75,BUY,1289.488521,0.0,0.003055,2024-12-02,Extreme Greed


In [104]:
# save the new merged dataset
merged_data.to_csv('csv_files/merged_data.csv' , index=False)

In [None]:
# Perform EDA and Data visualization