In [1]:
import pandas as pd

This part prepares the list of earthquakes by removing the earthquakes and columns that we are not going to include in the model, timewise and magnitude-wise. Our stock data start from 2008, and in our preliminary research we noticed that earthquakes below 6 do not really make an effect (We ran models with lower thresholds than 6 and did not see much of a difference in results).

In [None]:
df = pd.read_csv('../../01_data/01_raw/Eartquakes-1990-2023.csv')
df['date_orig']=df['date']
df[['date', 'time']] = df['date'].str.split(' ', expand=True)

#removing all the earthquakes before January 1, 2008
df['date'] = pd.to_datetime(df['date'], errors='coerce')
cutoff = pd.Timestamp('2008-01-01') 
df = df[df['date'] > cutoff]

#Removing all the earthquakes below 6 richter magnitude.
df = df.loc[df['magnitudo'] >= 6]

df.rename(columns={'magnitudo': 'magnitude'}, inplace=True)

df = df[['date_orig','date', 'time', 'longitude', 'latitude', 'magnitude', 'depth', 'significance', 'tsunami', 'state','data_type']]
df.to_csv('../../01_data/02_pre/clean_major_earthquakes.csv')

This part combines all our stock market files into one.

In [None]:
dfs = []

for year in range(2008, 2024):
    file_name = f"../../01_data/01_raw/{year}_Global_Markets_Data.csv"
    try:
        df = pd.read_csv(file_name)
        df['Year'] = year
        dfs.append(df)
    except FileNotFoundError:
        print(f"File not found: {file_name}")

# Concatenate all DataFrames into a single DataFrame
df = pd.concat(dfs, ignore_index=True)
df = df.replace({r'\^': '', r'=': '', r'000001.SS':'000001SS'}, regex=True)
df['change'] = df['Close'] - df['Open']
df.rename(columns={'Date': 'date','Ticker':'ticker','Open':'open'}, inplace=True)
df = df[['ticker','date','open','change']]
df.to_csv('../../01_data/02_pre/stocks.csv')

This splits the stock market file to files for each market.

In [None]:
import os

Tickers = df['ticker'].unique()
for ticker in Tickers:
    df_alt = df[df['ticker'] == ticker]
    # Define the full directory path
    directory = "../../01_data/02_pre/01_index"
    
    # Making sure the directory exists and creating the file
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = f"{directory}/{ticker}.csv"
    df_alt.to_csv(filename, index=False)
