In [1]:
# Importing libraries
import pandas as pd
import numpy as np

## Load Fear Greed Index

In [2]:
fear_greed = pd.read_csv("fear_greed_index.csv")

In [3]:
# Rename columns to be more Pythonic and align with assignment
fear_greed.columns = ['timestamp', 'index_value', 'classification', 'date']

In [4]:
print("--- Fear Greed Index Load (First 5 rows) ---")
fear_greed.head()

--- Fear Greed Index Load (First 5 rows) ---


Unnamed: 0,timestamp,index_value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [5]:
print("\n--- Fear Greed Index Info ---")
fear_greed.info()


--- Fear Greed Index Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2644 entries, 0 to 2643
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   timestamp       2644 non-null   int64 
 1   index_value     2644 non-null   int64 
 2   classification  2644 non-null   object
 3   date            2644 non-null   object
dtypes: int64(2), object(2)
memory usage: 82.8+ KB


In [6]:
# Convert 'date' to datetime object
fear_greed['date'] = pd.to_datetime(fear_greed['date'])

In [7]:
# Drop the 'timestamp' column as 'date' is more usable
fear_greed = fear_greed.drop(columns=['timestamp'])    

In [8]:
# Save the cleaned dataframe
fear_greed.to_csv('fear_greed_cleaned.csv', index=False)

## Load Historical Trader Data

In [9]:
historical_data = pd.read_csv("historical_data.csv")

In [10]:
# Rename columns to be more Pythonic and align with assignment
# The original columns are: Account, Coin, Price, Size, Value, Side, Time, Start Position, Event, Closed PnL, Tx Hash, Tx Index, Is Maker, Fee, Trade ID, Timestamp
historical_data.columns = ['account', 'symbol', 'execution_price', 'size', 'value', 'side',
                 'time_raw', 'start_position', 'event', 'closed_pnl', 'tx_hash', 
                 'tx_index', 'is_maker', 'fee', 'trade_id', 'timestamp'  
                ]

In [11]:
print("\n--- Historical Trader Data Load (First 5 rows) ---")
historical_data.head()


--- Historical Trader Data Load (First 5 rows) ---


Unnamed: 0,account,symbol,execution_price,size,value,side,time_raw,start_position,event,closed_pnl,tx_hash,tx_index,is_maker,fee,trade_id,timestamp
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0


In [12]:
print("\n--- Historical Trader Data Info ---")
historical_data.info()


--- Historical Trader Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211224 entries, 0 to 211223
Data columns (total 16 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   account          211224 non-null  object 
 1   symbol           211224 non-null  object 
 2   execution_price  211224 non-null  float64
 3   size             211224 non-null  float64
 4   value            211224 non-null  float64
 5   side             211224 non-null  object 
 6   time_raw         211224 non-null  object 
 7   start_position   211224 non-null  float64
 8   event            211224 non-null  object 
 9   closed_pnl       211224 non-null  float64
 10  tx_hash          211224 non-null  object 
 11  tx_index         211224 non-null  int64  
 12  is_maker         211224 non-null  bool   
 13  fee              211224 non-null  float64
 14  trade_id         211224 non-null  float64
 15  timestamp        211224 non-null  float64
dtypes

In [13]:
# Convert 'time_raw' to datetime object
# The format seems to be 'DD-MM-YYYY HH:MM' based on the error: "18-03-2025 12:50"
historical_data['time'] = pd.to_datetime(historical_data['time_raw'], format='%d-%m-%Y %H:%M')

In [14]:
# drop extra column
historical_data = historical_data.drop(columns=['time_raw'])

In [15]:
# Convert relevant columns to numeric types, coercing errors
numeric_cols = ['execution_price', 'size', 'value', 'start_position', 'closed_pnl', 'fee', 'trade_id', 'timestamp']

In [16]:
for col in numeric_cols:
    # Some columns might be loaded as object due to mixed types, so we convert them
    historical_data[col] = pd.to_numeric(historical_data[col], errors='coerce')

In [17]:
# Drop rows where key numeric columns failed to convert (e.g., header row)
historical_data.dropna(subset=['execution_price', 'size', 'closed_pnl'], inplace=True)
    

In [18]:
# Save the cleaned dataframe
historical_data.to_csv('historical_data_cleaned.csv', index=False)