In [66]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import math
import warnings
warnings.simplefilter('ignore')

In [67]:
pd.set_option('display.max_rows', None)

In [68]:
def decay(start, days):
    mult = 2*((-1/(1+math.exp(-0.25*days)))+1)
    return start*mult

In [69]:
def custom_interpolate(df):
    last_valid_index = None  # Track the last index where data was not NaN
    for i in range(len(df)):
        if pd.isna(df.loc[i, 'Sentiment']):
            if last_valid_index is not None:
                # Calculate the difference in days between the current date and the last valid date
                days_diff = (df.loc[i, 'fdate'] - df.loc[last_valid_index, 'fdate']).days
                # Apply a simple interpolation or custom formula
                # Example: linear increase of 10 units per day from the last valid value
                df.loc[i, 'Sentiment'] = decay(df.loc[last_valid_index, 'Sentiment'], days_diff)
        else:
            last_valid_index = i  # Update the last valid index to the current row


In [70]:
def sentimentseries(ticker):
    df = pd.read_csv('wrdsdata.csv')
    #df.head()
    df_filtered = df[df['form'].isin(['10-K', '10-Q'])]
    df_filtered['Sentiment'] = np.tanh(np.log((df_filtered['lm_positive']/df_filtered['lm_negative'])/df_filtered['lm_uncertainty']))
    #df_filtered.head()
    df_filter_again = df_filtered[df_filtered['TICKERH'].isin([ticker])]
    #df_filter_again.head()
    df_filter_again['fdate'] = pd.to_datetime(df_filter_again['fdate'])
    df_filter_again = df_filter_again.sort_values(by='fdate')
    # Reset the index and drop the old index
    df_filter_again.reset_index(drop=True, inplace=True)
    df_filter_again = df_filter_again.drop_duplicates(subset=['fdate'])
    
    date_range = pd.date_range(start=df_filter_again['fdate'].min(), end=df_filter_again['fdate'].max())

    
    date_df = pd.DataFrame(date_range, columns=['fdate'])

    # Merge the new DataFrame with the original DataFrame
    full_df = pd.merge(date_df, df_filter_again, on='fdate', how='left')

    for index in range(1, len(full_df)):
        if pd.isna(full_df.loc[index, 'Sentiment']):
            # Apply a custom formula; for example, decrement by 1
            full_df.loc[index, 'Sentiment'] = full_df.loc[index - 1, 'Sentiment']*0.90
            full_df.loc[index, 'TICKERH'] = full_df.loc[index - 1, 'TICKERH']
    

    #plt.plot(full_df['fdate'], full_df['Sentiment'])
    #plt.show()
    return full_df

In [71]:
frame = sentimentseries('AAPL')

In [72]:
tables = pd.read_html("https://en.wikipedia.org/wiki/S%26P_100")
TICKERS = tables[2]['Symbol']
del TICKERS[18]

In [73]:
frame.head()

Unnamed: 0,fdate,cik,form,lm_negative,lm_positive,lm_uncertainty,TICKERH,Sentiment
0,2008-02-01,320193.0,10-Q,0.037281,0.006479,0.016638,AAPL,0.981832
1,2008-02-02,,,,,,AAPL,0.883649
2,2008-02-03,,,,,,AAPL,0.795284
3,2008-02-04,,,,,,AAPL,0.715756
4,2008-02-05,,,,,,AAPL,0.64418


In [74]:
overall = pd.DataFrame(columns=['fdate', 'cik', 'form', 'lm_negative', 'lm_positive', 'lm_uncertainty', 'TICKERH', 'Sentiment'])
for i in TICKERS:
    try:
        frame = sentimentseries(i)
        overall = pd.concat([overall, frame], ignore_index=True)
        print("Done: ", i)
    except:
        continue

Done:  AAPL
Done:  ABBV
Done:  ABT
Done:  ACN
Done:  ADBE
Done:  AIG
Done:  AMD
Done:  AMGN
Done:  AMT
Done:  AMZN
Done:  AVGO
Done:  AXP
Done:  BA
Done:  BAC
Done:  BK
Done:  BKNG
Done:  BLK
Done:  BMY
Done:  C
Done:  CAT
Done:  CHTR
Done:  CL
Done:  CMCSA
Done:  COF
Done:  COP
Done:  COST
Done:  CRM
Done:  CSCO
Done:  CVS
Done:  CVX
Done:  DE
Done:  DHR
Done:  DIS
Done:  DOW
Done:  DUK
Done:  EMR
Done:  F
Done:  FDX
Done:  GD
Done:  GE
Done:  GILD
Done:  GM
Done:  GOOGL
Done:  GS
Done:  HD
Done:  HON
Done:  IBM
Done:  INTC
Done:  INTU
Done:  JNJ
Done:  JPM
Done:  KHC
Done:  KO
Done:  LIN
Done:  LLY
Done:  LMT
Done:  LOW
Done:  MA
Done:  MCD
Done:  MDLZ
Done:  MDT
Done:  MET
Done:  META
Done:  MMM
Done:  MO
Done:  MRK
Done:  MS
Done:  MSFT
Done:  NEE
Done:  NFLX
Done:  NKE
Done:  NVDA
Done:  ORCL
Done:  PEP
Done:  PFE
Done:  PG
Done:  PM
Done:  PYPL
Done:  QCOM
Done:  RTX
Done:  SBUX
Done:  SCHW
Done:  SO
Done:  SPG
Done:  T
Done:  TGT
Done:  TMO
Done:  TMUS
Done:  TSLA
Done:  TXN
Don

In [77]:
overall.tail()

Unnamed: 0,fdate,cik,form,lm_negative,lm_positive,lm_uncertainty,TICKERH,Sentiment
548164,2024-02-24,,,,,,XOM,5e-06
548165,2024-02-25,,,,,,XOM,4e-06
548166,2024-02-26,,,,,,XOM,4e-06
548167,2024-02-27,,,,,,XOM,4e-06
548168,2024-02-28,34088.0,10-K,0.012859,0.010424,0.006751,XOM,0.999861


In [78]:
overall.to_csv("final.csv")