In [1]:
import os
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from timezonefinder import TimezoneFinder
from pytz import timezone

In [None]:
df_log = pd.read_csv('../../01_data/02_pre/stocks.csv')

#Calculating the daily change
df_log['change_pct'] = df_log['change'] *100/ df_log['open']

#Calculating bounds (average and standard deviation for our 'effect')
bounds = df_log.groupby('date')['change_pct'].agg(['mean', 'std']).reset_index()
df_log = df_log.drop(columns=['Unnamed: 0'])

In [None]:
#Ensure the output directory exists
output_dir = '../../01_data/03_analysis/'
os.makedirs(output_dir, exist_ok=True)

df_markets = pd.read_csv('../../01_data/02_pre/markets_info.csv')
df_eq = pd.read_csv('../../01_data/02_pre/clean_major_earthquakes.csv')

df_eq['date'] = pd.to_datetime(df_eq['date'], format='mixed')

#We use this to find the timezone of the headquarters of each market
tf = TimezoneFinder()

#This loop creates all the necessary independent variables for our regressions
for _, market_row in df_markets.iterrows():
    
    #Start of the section for calculating the 'effect' column for the multinomial logistic regression
    ticker = market_row['ticker']
    ticker_data = df_log[df_log['ticker'] == ticker][['date', 'change_pct']]

    df_alt_ticker = pd.merge(ticker_data, bounds, on='date')
    #The conditions for the 'effect' are set out here
    conditions = [
    (df_alt_ticker['mean'] - df_alt_ticker['std'] <= df_alt_ticker['change_pct']),
    
    (df_alt_ticker['mean'] - 2 * df_alt_ticker['std'] <= df_alt_ticker['change_pct']) & 
    (df_alt_ticker['change_pct'] < df_alt_ticker['mean'] - df_alt_ticker['std']),
    
    (df_alt_ticker['mean'] - 3 * df_alt_ticker['std'] <= df_alt_ticker['change_pct']) & 
    (df_alt_ticker['change_pct'] < df_alt_ticker['mean'] - 2 * df_alt_ticker['std'])
]
    choices = [0, 1, 2]
    df_alt_ticker['effect'] = np.select(conditions, choices, default=3)
    df_alt_ticker['date'] = pd.to_datetime(df_alt_ticker['date'], errors='coerce').dt.date    
    ##End of the section for variables for multinomial logistic regression

    
    latitude = float(market_row['Latitude'])
    longitude = float(market_row['Longitude'])
    close_time = pd.to_datetime(market_row['close'], format='%H:%M', errors='coerce').time()

    #Finding the timezone
    timezone_str = tf.timezone_at(lat=latitude, lng=longitude)
    tz = timezone(timezone_str)

    df_ticker = pd.read_csv(f'../../01_data/02_pre/01_index/{ticker}.csv')

    #Converting the date column to the timezone of the market
    df_ticker['date'] = pd.to_datetime(df_ticker['date']).dt.tz_localize(tz)

    if df_eq['date'].dt.tz is None:
        #If the column is timezone-naive, localize it to UTC first
        df_eq['date_close'] = df_eq['date'].dt.tz_localize('UTC').dt.tz_convert(tz).dt.normalize() + pd.Timedelta(hours=close_time.hour, minutes=close_time.minute)
    else:
        #If the column is already timezone-aware, convert it directly
        df_eq['date_close'] = df_eq['date'].dt.tz_convert(tz).dt.normalize() + pd.Timedelta(hours=close_time.hour, minutes=close_time.minute)

    market_coords = (latitude, longitude)

    #Start of summarizing earthquakes to our independent variables
    num_list, sum_list, max_mag, max_sig, min_depth, min_dist_list, sum_tsunami = [], [], [], [], [], [], []

    for i, ticker_row in df_ticker.iterrows():
        curr_date = ticker_row['date']
        prev_date = df_ticker.iloc[i - 1]['date'] if i > 0 else None

        #Time window: After last open day's close and before  today's close
        if prev_date is not None:
            eq_filtered = df_eq[(df_eq['date'] > prev_date + pd.Timedelta(hours=close_time.hour, minutes=close_time.minute)) & 
                               (df_eq['date'] <= curr_date + pd.Timedelta(hours=close_time.hour, minutes=close_time.minute))]
        else:
            eq_filtered = df_eq[df_eq['date'] <= curr_date + pd.Timedelta(hours=close_time.hour, minutes=close_time.minute)]

        
        num_list.append(len(eq_filtered))
        sum_list.append(eq_filtered['magnitudo'].sum() if not eq_filtered.empty else np.nan)
        max_mag.append(eq_filtered['magnitudo'].max() if not eq_filtered.empty else np.nan)
        max_sig.append(eq_filtered['significance'].max() if not eq_filtered.empty else np.nan)
        min_depth.append(eq_filtered['depth'].min() if not eq_filtered.empty else np.nan)
        sum_tsunami.append(eq_filtered['tsunami'].sum() if not eq_filtered.empty else np.nan)
        
        #Finding the distances from the market's location
        if not eq_filtered.empty:
            distances = eq_filtered.apply(lambda row: geodesic((row['latitude'], row['longitude']), market_coords).km, axis=1)
            min_dist_list.append(distances.min())
        else:
            min_dist_list.append(np.nan)      
        
    
    df = df_ticker.copy()
    df['num'] = num_list
    df['sum'] = sum_list
    df['max_mag'] = max_mag
    df['max_sig'] = max_sig
    df['min_depth'] = min_depth
    df['min_dist'] = min_dist_list
    df['tsunami'] = sum_tsunami
    df['date'] = pd.to_datetime(df['date']).dt.date
    
    
    #Merging the two DataFrames created in the loop
    merged_df = pd.merge(df_alt_ticker, df, left_on='date', right_on='date', how='inner')    
    merged_df = merged_df.drop(columns=['mean', 'std'])
    #Reordering columns
    merged_df = merged_df[['date', 'change', 'change_pct', 'effect', 'num', 'sum', 'max_mag', 'max_sig', 'min_depth', 'min_dist', 'tsunami']]


    output_file = os.path.join(output_dir, f'{ticker}.csv')
    merged_df.to_csv(output_file, index=False)
    print(f"Saved data for ticker {ticker} to {output_file}")

Saved data for ticker NYA to ../../01_data/03_analysis/NYA.csv
Saved data for ticker IXIC to ../../01_data/03_analysis/IXIC.csv
Saved data for ticker FTSE to ../../01_data/03_analysis/FTSE.csv
Saved data for ticker NSEI to ../../01_data/03_analysis/NSEI.csv
Saved data for ticker BSESN to ../../01_data/03_analysis/BSESN.csv
Saved data for ticker N225 to ../../01_data/03_analysis/N225.csv
Saved data for ticker 000001SS to ../../01_data/03_analysis/000001SS.csv
Saved data for ticker N100 to ../../01_data/03_analysis/N100.csv
Saved data for ticker DJI to ../../01_data/03_analysis/DJI.csv
Saved data for ticker GSPC to ../../01_data/03_analysis/GSPC.csv
