In [None]:
import pandas as pd
import matplotlib.pyplot as plt 

In [None]:
data = pd.read_csv('TSLA.csv', index_col=0)

In [None]:

first_index = data['Firm'].first_valid_index()

# Drop everything before that index (if it exists)
if first_index is not None:
    data = data.loc[first_index:].reset_index(drop=True)


In [None]:

def create_event_dataframes(data):
    event_dfs = {}
    
    # Define a mapping of ToGrade values to event phrases
    action_map = {
        'Buy': 'suggests buy',
        'Sell': 'suggests sell',
        'Outperform': 'suggests outperform',
        'Underperform': 'suggests underperform',
        'Overweight': 'suggests overweight',
        'Underweight': 'suggests underweight',
        'Neutral': 'suggests hold',
        'Higher': 'suggests higher',
        'Lower': 'suggests lower',
        # Add any other mappings you need here
    }

    # Iterate through each unique firm
    for firm in data['Firm'].unique():
        # Filter rows for the current firm
        firm_data = data[data['Firm'] == firm]
        
        # Iterate through the unique suggestions in ToGrade
        for to_grade in firm_data['ToGrade'].unique():
            # Convert to_grade to a string to avoid AttributeError
            to_grade_str = str(to_grade)

            # Map the ToGrade to an event phrase
            action_phrase = action_map.get(to_grade_str, f'suggests {to_grade_str.lower()}')

            # Create a temporary DataFrame for the current event
            temp_event_df = pd.DataFrame({
                'ds': pd.to_datetime(firm_data[firm_data['ToGrade'] == to_grade]['Date']),
                'event': f"{firm} {action_phrase}"  # Combine firm name and action
            })
            
            # Merge the event DataFrame into event_dfs
            if not temp_event_df.empty:
                # Group by 'event' and aggregate dates as formatted strings
                grouped_df = temp_event_df.groupby('event')['ds'].agg(lambda x: list(x.dt.strftime('%Y-%m-%d'))).reset_index()
                event_dfs[(firm, to_grade_str)] = grouped_df  # Store by firm and ToGrade

    return event_dfs

# Example usage
event_dfs = create_event_dataframes(data)


In [None]:
#Changes name based on firm
# Load data into a DataFrame
# Group by Date and aggregate
merged_df = data.groupby('Date').agg({
    'Open': 'first',               # First value of Open
    'High': 'max',                 # Max value of High
    'Low': 'min',                  # Min value of Low
    'Close': 'last',               # Last value of Close
    'Volume': 'sum',               # Total Volume
    'Dividends': 'first',          # First value of Dividends
    'Stock Splits': 'first',        # First value of Stock Splits
    'Reported EPS': 'first',
    'EPS Estimate': 'first',
    'Surprise(%)': 'first',
}).reset_index()

# Create unique columns for Firm, ToGrade, FromGrade, and Action
unique_firms = data['Firm'].unique()

for firm in unique_firms:
    firm_data = data[data['Firm'] == firm]
    merged_df[firm] = firm_data.groupby('Date')['Firm'].transform(lambda x: x.iloc[0] if not x.empty else None)
    merged_df[f'ToGrade {firm}'] = firm_data.groupby('Date')['ToGrade'].transform(lambda x: x.iloc[0] if not x.empty else None)
    merged_df[f'FromGrade {firm} '] = firm_data.groupby('Date')['FromGrade'].transform(lambda x: x.iloc[0] if not x.empty else None)
    merged_df[f'Action {firm}'] = firm_data.groupby('Date')['Action'].transform(lambda x: x.iloc[0] if not x.empty else None)
    merged_df = merged_df.drop(firm, axis=1) #without this it just appends firm


# Display the merged DataFrame

data = merged_df


In [None]:
data['Reported EPS'] = data['Reported EPS'].fillna(method='ffill') #this is ffill cause we knew it but don't know what the future will be
data['EPS Estimate'] = data['EPS Estimate'].fillna(method='bfill') #This is bfill cause we know the estimate ahead of time
data['Surprise(%)'] = data['Surprise(%)'].fillna(method='ffill')
data['Surprise(%)'] = data['Surprise(%)'].fillna(0)
data['EPS Estimate'] = data['EPS Estimate'].fillna(method='ffill') #For future prediction
data['Reported EPS'] = data['Reported EPS'].fillna(0)

In [None]:
if (data['Dividends'] == 0).all():
        print("Dropping Dividends")
        data = data.drop(columns=['Dividends'])
if (data['Stock Splits'] == 0).all():
        print("Dropping StockSplits")
        data = data.drop(columns=['Stock Splits'])

In [None]:
from neuralprophet import NeuralProphet
#from prophet import Prophet

from neuralprophet import set_random_seed

set_random_seed(0)


In [None]:
#epochs and learning_rate, and potentially increase the batch_size
# Initialize the Prophet model
#Create multiple models
#Needed to individually create model objects or it errors!
Openmodel = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    epochs=7000,

    n_changepoints=20,
    trend_global_local="local",
    #growth="logistic", #currently unsupported feature
    seasonality_mode="multiplicative",

    batch_size=16,

    learning_rate=0.0001,
    #ar_layers=200

    #trainer_config={"accelerator": "mps"}
)
Closemodel = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    epochs=7000,

    n_changepoints=20,
    trend_global_local="local",

    seasonality_mode="multiplicative",

    batch_size=16,

    learning_rate=0.0001,
    #ar_layers=200

    #trainer_config={"accelerator": "mps"}
)
Volumemodel = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    epochs=7000,

    n_changepoints=20,
    trend_global_local="local",
    #growth="logistic", #currently unsupported feature
    seasonality_mode="multiplicative",

    batch_size=16,
    #learning_rate=0.00005,
    learning_rate=0.0001,
    #ar_layers=7

    #trainer_config={"accelerator": "mps"}
)

Reportedepsmodel = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    epochs=7000,

    n_changepoints=20,
    trend_global_local="local",
    #growth="logistic", #currently unsupported feature
    seasonality_mode="multiplicative",

    batch_size=16,
    learning_rate=0.0001,
    #ar_layers=7

    #trainer_config={"accelerator": "mps"}
)



In [None]:

for col in data.columns:
    if col.startswith('ToGrade') or col.startswith('FromGrade') or col.startswith('Action'):
        data = data.drop(col, axis=1)


In [None]:
data = data.rename(columns={'Date': 'ds'})

In [None]:

data = data.drop("High", axis=1) #dropped to make room for eps
data = data.drop("Low", axis=1) #dropped to make room for eps

In [None]:
Openmodeldata = Closemodeldata = Volumedata = Reportedepsdata = data

In [None]:

Openmodeldata['y'] = data['Open']
Openmodeldata = Openmodeldata.drop("Open", axis=1)
Closemodeldata['y'] = data['Close']
Closemodeldata = Closemodeldata.drop("Close", axis=1)
Volumedata['y'] = data['Volume']
Volumedata = Volumedata.drop("Volume", axis=1)
Reportedepsdata['y'] = data['Reported EPS']
Reportedepsdata = Reportedepsdata.drop("Reported EPS", axis=1)

In [None]:
#Did it this way because it uses functions that modify the neuralprophet object. I would have prefered just to modify data then copy
def add_events_to_model(event_dfs, model):
    for (firm, to_grade), event_df in event_dfs.items():
        for _, row in event_df.iterrows():
            event_name = row['event']
            event_dates = row['ds']  # This will be a list of dates

            # Print the event details for confirmation
            print(f"Adding event: {event_name} on dates: {event_dates}")

            # Add the event name to the model
            try:
                model = model.add_events(event_name)
               
               # df_all = m.create_df_with_events(df, event_df)
                
                print(f"Successfully added event: {event_name}")
            except ValueError as e:
                print(f"Error adding event {event_name}: {e}")

    
    return model

# Example usage
Openmodel = add_events_to_model(event_dfs, Openmodel)
Closemodel = add_events_to_model(event_dfs, Closemodel)
Volumemodel = add_events_to_model(event_dfs, Volumemodel)
Reportedepsmodel = add_events_to_model(event_dfs, Reportedepsmodel)
#Add all the stuff to the data
for (firm, to_grade), event_df in event_dfs.items():
    Openmodeldata = Openmodel.create_df_with_events(Openmodeldata, event_df)
    Closemodeldata = Closemodel.create_df_with_events(Closemodeldata, event_df)
    Volumedata = Volumemodel.create_df_with_events(Volumedata, event_df)
    Reportedepsdata = Reportedepsmodel.create_df_with_events(Reportedepsdata, event_df)



In [None]:
#'Firm', 'ToGrade', 'FromGrade', 'Action'
def addregressorstotal(m, dataform):
    m = m.add_country_holidays("US") #add holidays to our trends
    if "Close" in dataform.columns:
        m = m.add_lagged_regressor("Close", n_lags=7)
    if "Volume" in dataform.columns:
        m = m.add_lagged_regressor("Volume", n_lags=7)
    if "Open" in dataform.columns:
        m = m.add_lagged_regressor("Open", n_lags=7)
    if "Reported EPS" in dataform.columns:
        m = m.add_lagged_regressor("Reported EPS", n_lags=1)
    m = m.add_lagged_regressor("Surprise(%)", n_lags=1)
    m = m.add_future_regressor("EPS Estimate", mode='additive') #Perhaps make "multiplicative" depending
    if ('Dividends' in dataform.columns):
         m = m.add_future_regressor("Dividends", mode='additive') 
    if ('Stock Splits' in dataform.columns):
        m = m.add_future_regressor("Stock Splits", mode='additive')
addregressorstotal(Openmodel,Openmodeldata)
addregressorstotal(Closemodel,Closemodeldata)
addregressorstotal(Volumemodel,Volumedata)
addregressorstotal(Reportedepsmodel,Reportedepsdata)


In [None]:
metricsOpen = Openmodel.fit(Openmodeldata, early_stopping = True)

In [None]:
metricsClose = Closemodel.fit(Closemodeldata, early_stopping = True)

In [None]:
metricsVolume = Volumemodel.fit(Volumedata, early_stopping = True)

In [None]:
metricsReportedeps = Reportedepsmodel.fit(Reportedepsdata, early_stopping = True)

In [None]:
forecastOpen = Openmodel.predict(Openmodeldata)
forecastClose = Closemodel.predict(Closemodeldata)
forecastVolume = Volumemodel.predict(Volumedata)
forecastReportedeps = Reportedepsmodel.predict(Reportedepsdata)

In [None]:
Openmodel.plot(forecastOpen)

In [None]:
Closemodel.plot(forecastClose)

In [None]:
Volumemodel.plot(forecastVolume)

In [None]:
Reportedepsmodel.plot(forecastReportedeps)

In [None]:
future_dfOpen = Openmodeldata.copy()
future_dfClose = Closemodeldata.copy()
future_dfVolume = Volumedata.copy()
future_dfReportedeps = Reportedepsdata.copy()
Cleanedfuture=pd.DataFrame()


In [None]:
print(future_dfOpen['Surprise(%)'])

In [None]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay

# Assuming `data` is your training DataFrame
# Step 1: Clone the existing DataFrame
#forecast = pd.DataFrame()
def predictFuture(data, model):
    #notes predict does 1 day behind cause it can't go forward so we have to move everything forward again
    future_df = data.copy()

# Step 2: Generate future business days
    last_date = future_df['ds'].max()
    future_periods = 1  # Specify how many future days you want
    future_dates = pd.date_range(start=last_date + BDay(1), periods=future_periods, freq=BDay())
    #future_dates = future_dates.date

# Step 3: Create a new DataFrame for future periods
    last_eps_estimate = data['EPS Estimate'].tail(1).values[0]  # Get the last EPS Estimate value

# Initialize future entries with the last values from the original data
    future_entries = {
        'ds': future_dates,
        'EPS Estimate': [last_eps_estimate] * future_periods,  # Use the last value
    }
    #future_entries['ds'] = future_entries['ds'].strftime('%Y/%m/%d')
    #print("HEEERE")
    #print(future_entries['ds'])

# Fill in other columns with the last available value
    for column in data.columns:
        if column != 'ds':  # Skip the 'ds' column since we are generating new dates
            if column not in ['Dividends', 'Stock Splits']:  # Handle these later
                future_entries[column] = [data[column].tail(1).values[0]] * future_periods

# Check if 'Dividends' exists in the original data and add if it does
    if 'Dividends' in data.columns:
        future_entries['Dividends'] = [data['Dividends'].tail(1).values[0]] * future_periods

# Check if 'Stock Splits' exists in the original data and add if it does
    if 'Stock Splits' in data.columns:
        future_entries['Stock Splits'] = [data['Stock Splits'].tail(1).values[0]] * future_periods
# Convert future_entries to DataFrame
    future_entries_df = pd.DataFrame(future_entries)

# Step 4: Add future entries to the cloned DataFrame
    future_df = pd.concat([future_df, future_entries_df], ignore_index=True)

# Ensure 'y' column is present for predictions
    if 'y' not in future_df.columns:
        future_df['y'] = np.nan

    forecast = model.predict(future_df)
    return forecast


#This is the part that moves the data forward for next prediction
def extendData(data2extend,forecastedOpen, forecastedClose, forecastedVolume, forecastedEPS):
    data2extend.loc[len(data2extend)] = [None] * len(data2extend.columns)
    

    if 'Open' not in data2extend.columns:
        data2extend['y'] = data2extend['y'].where(forecastedOpen['yhat1'].isna(), forecastedOpen['yhat1'])
        data2extend['Close'] = data2extend['Close'].where(forecastedClose['yhat1'].isna(), forecastedClose['yhat1'])
        data2extend['Volume'] = data2extend['Volume'].where(forecastedVolume['yhat1'].isna(), forecastedVolume['yhat1'])
        data2extend['Reported EPS'] = data2extend['Reported EPS'].where(forecastedEPS['yhat1'].isna(), forecastedEPS['yhat1'])
        data2extend.loc[data2extend.index[-1], 'Surprise(%)'] = ((data2extend['Reported EPS'].iloc[-1] - data2extend['EPS Estimate'].iloc[-1]) / data2extend['EPS Estimate'].iloc[-1])

    if 'Close' not in data2extend.columns:
        data2extend['Open'] = data2extend['Open'].where(forecastedOpen['yhat1'].isna(), forecastedOpen['yhat1'])
        data2extend['y'] = data2extend['y'].where(forecastedClose['yhat1'].isna(), forecastedClose['yhat1'])
        data2extend['Volume'] = data2extend['Volume'].where(forecastedVolume['yhat1'].isna(), forecastedVolume['yhat1'])
        data2extend['Reported EPS'] = data2extend['Reported EPS'].where(forecastedEPS['yhat1'].isna(), forecastedEPS['yhat1'])
        data2extend.loc[data2extend.index[-1], 'Surprise(%)'] = ((data2extend['Reported EPS'].iloc[-1] - data2extend['EPS Estimate'].iloc[-1]) / data2extend['EPS Estimate'].iloc[-1])

    if 'Volume' not in data2extend.columns:
        data2extend['Open'] = data2extend['Open'].where(forecastedOpen['yhat1'].isna(), forecastedOpen['yhat1'])
        data2extend['Close'] = data2extend['Close'].where(forecastedClose['yhat1'].isna(), forecastedClose['yhat1'])
        data2extend['y'] = data2extend['y'].where(forecastedVolume['yhat1'].isna(), forecastedVolume['yhat1'])
        data2extend['Reported EPS'] = data2extend['Reported EPS'].where(forecastedEPS['yhat1'].isna(), forecastedEPS['yhat1'])
        data2extend.loc[data2extend.index[-1], 'Surprise(%)'] = ((data2extend['Reported EPS'].iloc[-1] - data2extend['EPS Estimate'].iloc[-1]) / data2extend['EPS Estimate'].iloc[-1])

    if 'Reported EPS' not in data2extend.columns:
        data2extend['Open'] = data2extend['Open'].where(forecastedOpen['yhat1'].isna(), forecastedOpen['yhat1'])
        data2extend['Close'] = data2extend['Close'].where(forecastedClose['yhat1'].isna(), forecastedClose['yhat1'])
        data2extend['Volume'] = data2extend['Volume'].where(forecastedVolume['yhat1'].isna(), forecastedVolume['yhat1'])
        data2extend['y'] = data2extend['y'].where(forecastedEPS['yhat1'].isna(), forecastedEPS['yhat1'])
        data2extend.loc[data2extend.index[-1], 'Surprise(%)'] = ((data2extend['y'].iloc[-1] - data2extend['EPS Estimate'].iloc[-1]) / data2extend['EPS Estimate'].iloc[-1])

    
    last_date = data2extend['ds'].max()
    future_periods = 1  # Specify how many future days you want
    future_dates = pd.date_range(start=last_date + BDay(1), periods=future_periods, freq=BDay())
    data2extend['ds'].tail(1).values[0] = future_dates[0]
    data2extend.ffill(inplace=True) #Front fill needs to be add end, or we get drop columns error


    
    return data2extend
#####

future_dfOpen = Openmodeldata.copy()
future_dfClose = Closemodeldata.copy()
future_dfVolume = Volumedata.copy()
future_dfReportedeps = Reportedepsdata.copy()

forecastOpen = predictFuture(future_dfOpen, Openmodel)
forecastClose = predictFuture(future_dfClose, Closemodel)
forecastVolume = predictFuture(future_dfVolume, Volumemodel)
forecastReportedeps = predictFuture(future_dfReportedeps, Reportedepsmodel)
periods = 7
count = 0
while count < periods:
    future_dfOpen = extendData(future_dfOpen, forecastOpen,forecastClose,forecastVolume,forecastReportedeps)
    future_dfClose = extendData(future_dfClose, forecastOpen,forecastClose,forecastVolume,forecastReportedeps)
    future_dfVolume = extendData(future_dfVolume, forecastOpen,forecastClose,forecastVolume,forecastReportedeps)
    future_dfReportedeps = extendData(future_dfReportedeps, forecastOpen,forecastClose,forecastVolume,forecastReportedeps)
    forecastOpen = predictFuture(future_dfOpen, Openmodel)
    forecastClose = predictFuture(future_dfClose, Closemodel)
    forecastVolume = predictFuture(future_dfVolume, Volumemodel)
    forecastReportedeps = predictFuture(future_dfReportedeps, Reportedepsmodel)
    count += 1




In [None]:
forecastOpen[['ds','y','yhat1','lagged_regressor_Surprise(%)1', 'lagged_regressor_Reported EPS1']]

In [None]:
for i in forecastOpen.columns:
    if 'EPS' in i:
        print(i)

In [None]:
#forecast = Closemodel.predict(future_dfClose)


In [None]:
#forecast[['ds','y','yhat1']]