In [2]:
pip install statsmodels

Collecting statsmodels
  Downloading statsmodels-0.14.4-cp311-cp311-macosx_10_9_x86_64.whl.metadata (9.2 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading statsmodels-0.14.4-cp311-cp311-macosx_10_9_x86_64.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0mm
[?25hDownloading patsy-1.0.1-py2.py3-none-any.whl (232 kB)
Installing collected packages: patsy, statsmodels
Successfully installed patsy-1.0.1 statsmodels-0.14.4
Note: you may need to restart the kernel to use updated packages.


In [17]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from datetime import timedelta

# Load datasets
average_data = pd.read_csv("/Users/erolberkiyibozkurt/Documents/GitHub/Python/Projects/The Influence of AI Boom on Tech Stocks/Extracted Data/stock_timeseries_usd_full.csv")
ai_events = pd.read_csv("/Users/erolberkiyibozkurt/Documents/GitHub/Python/Projects/The Influence of AI Boom on Tech Stocks/Background Data/timeline_of_the_ai.csv")

# Ensure date columns are in datetime format
average_data['Date'] = pd.to_datetime(average_data['Date'], errors='coerce')
ai_events['Date'] = pd.to_datetime(ai_events['Date'], errors='coerce')

# Rename columns to match the dataset
average_data.rename(columns={
    'Adj Close': 'Adj_Close',
    'Close': 'Close_Price',
    'High': 'High_Price',
    'Low': 'Low_Price',
    'Open': 'Open_Price'
}, inplace=True)

# Filter average data for dates after the earliest AI event
earliest_event_date = ai_events['Date'].min()
filtered_average_data = average_data[average_data['Date'] >= earliest_event_date]

# Initialize a list to store regression results
regression_results = []

for event in ai_events['AI Event'].unique():
    # Filter data around the event
    event_date = ai_events.loc[ai_events['AI Event'] == event, 'Date'].iloc[0]
    event_window = filtered_average_data[
        (filtered_average_data['Date'] >= (event_date - timedelta(days=7))) &
        (filtered_average_data['Date'] <= (event_date + timedelta(days=7)))
    ]
    
    # Skip if event_window is empty
    if event_window.empty:
        continue
    
    # Prepare data for regression
    event_window['AI_Event_Impact'] = (event_window['Date'] == event_date).astype(int)
    event_window['Constant'] = 1  # Add constant term
    X = event_window[['Constant', 'AI_Event_Impact']]
    y = event_window['Adj_Close']
    
    # Perform regression
    model = sm.OLS(y, X).fit()
    regression_results.append((event, model.rsquared, event_window, model))

# Sort regression results by R-squared to find the most and least impactful events
sorted_results = sorted(regression_results, key=lambda x: x[1], reverse=True)
most_impactful = sorted_results[:4]
least_impactful = sorted_results[-4:]

# Plot and save graphs for each impactful event
for i, (event, _, data, model) in enumerate(most_impactful):
    plt.figure(figsize=(8, 6))
    plt.scatter(data['AI_Event_Impact'], data['Adj_Close'], alpha=0.6, label='Data Points')
    plt.plot(data['AI_Event_Impact'], model.predict(), color='red', label='Regression Line')
    plt.title(f"Most Impactful Event {i+1}: {event} (R² = {model.rsquared:.3f})")
    plt.xlabel("AI Event Impact")
    plt.ylabel("Average Adj Close Price")
    plt.legend()
    plt.savefig(f"most_impactful_event_{i+1}.png")
    plt.close()

for i, (event, _, data, model) in enumerate(least_impactful):
    plt.figure(figsize=(8, 6))
    plt.scatter(data['AI_Event_Impact'], data['Adj_Close'], alpha=0.6, label='Data Points')
    plt.plot(data['AI_Event_Impact'], model.predict(), color='red', label='Regression Line')
    plt.title(f"Least Impactful Event {i+1}: {event} (R² = {model.rsquared:.3f})")
    plt.xlabel("AI Event Impact")
    plt.ylabel("Average Adj Close Price")
    plt.legend()
    plt.savefig(f"least_impactful_event_{i+1}.png")
    plt.close()

# Correlation analysis
correlation = filtered_average_data['Adj_Close'].corr(filtered_average_data['Date'].astype(int))
print(f"Correlation between Adj Close and Date: {correlation}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  event_window['AI_Event_Impact'] = (event_window['Date'] == event_date).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  event_window['Constant'] = 1  # Add constant term
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  event_window['AI_Event_Impact'] = (event_window['Date'] == event_date).a

Correlation between Adj Close and Date: 0.10219974919309975
