In [1]:
import seaborn as sns
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# from matplotlib.lines import Line2D
import re

import plotly as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# modeling
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', 500)
title_fontsize = 15

In [183]:
stocks

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Stock Name
0,147.639999,150.639999,144.839996,149.839996,149.363785,146691400,AAPL
1,146.100006,146.720001,140.679993,142.479996,142.027176,128138200,AAPL
2,141.279999,143.100006,138.000000,138.199997,137.760773,124925300,AAPL
3,138.210007,143.070007,137.690002,142.449997,141.997284,114311700,AAPL
4,145.029999,146.220001,144.259995,146.100006,145.635681,87830100,AAPL
...,...,...,...,...,...,...,...
45435,13.730000,14.600000,13.400000,13.740000,13.740000,23489400,XPEV
45436,13.090000,13.892000,12.860000,13.710000,13.710000,28279600,XPEV
45437,14.280000,14.830000,14.070000,14.370000,14.370000,27891300,XPEV
45438,14.580000,14.800000,13.580000,13.710000,13.710000,21160800,XPEV


In [184]:
# assigning dfs to files
df1 = pd.read_csv('./data/cleaned_up_data/all_stock_sentiment.csv').iloc[:, 1:].dropna().drop_duplicates()
df2 = pd.read_csv('./data/cleaned_up_data/scraped_sentiment_dframes.csv').iloc[:, 3:].dropna().drop_duplicates()
df3 = pd.read_csv('./data/cleaned_up_data/scraped_sentiment_extra.csv').iloc[:, 1:].dropna().drop_duplicates()
df4 = pd.read_csv('./data/cleaned_up_data/scraped_sentiment_extra2.csv').iloc[:, 1:].dropna().drop_duplicates()
stocks = pd.read_csv('./data/cleaned_up_data/scraped_stock_2015_2023.csv')

# combining and cleaning
df1.columns = ['Text', 'Datetime', 'Stock Name', 'sentiment']
df = pd.concat([df2, df1, df3, df4], ignore_index=True)
df['Datetime'] = pd.to_datetime(df.Datetime, utc=True).dt.date
df = df.drop_duplicates().sort_values('Datetime').reset_index(drop=True)

# remove unwanted companies/years
df = df[~(pd.to_datetime(df.Datetime).dt.year == 2014)]
df = df[~(df['Stock Name'] == 'F')]
df = df[~(df['Stock Name'] == 'INTC')]
df = df[~(df['Stock Name'] == 'ENPH')]
df = df[~(df['Stock Name'] == 'AMD')]
df = df[~(df['Stock Name'] == 'XPEV')]
df = df[~(df['Stock Name'] == 'VZ')]

# convert stocks datetime to date-type
stocks.Date = pd.to_datetime(stocks.Date).dt.date

df2 = df.copy()


Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.



In [191]:
# check balance of dataset

dff = df2.copy()
dff[['Datetime', 'Stock Name']]
dff['Datetime'] = pd.to_datetime(dff.Datetime).dt.year

dff = dff.groupby(['Datetime', 'Stock Name']).count().reset_index()
dff.pivot(index='Stock Name', columns='Datetime', values='sentiment')

Datetime,2015,2016,2017,2018,2019,2020,2021,2022,2023
Stock Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,298640.0,365980.0,185627.0,192194.0,186868.0,49775.0,49747.0,51806.0,4955.0
AMD,4948.0,4996.0,4960.0,4487.0,14081.0,4984.0,2395.0,1121.0,4918.0
AMZN,80594.0,104223.0,110263.0,103469.0,90536.0,49800.0,49484.0,49413.0,4960.0
BA,4964.0,1329.0,3119.0,2928.0,2566.0,3380.0,2031.0,3962.0,4966.0
BX,1786.0,1701.0,1467.0,1240.0,1229.0,1210.0,1229.0,1416.0,996.0
COST,2363.0,2005.0,1958.0,1803.0,1758.0,1779.0,1677.0,1685.0,1497.0
CRM,3488.0,3274.0,3529.0,2563.0,1832.0,2494.0,2018.0,1753.0,4970.0
DIS,1695.0,1652.0,920.0,1131.0,1501.0,1288.0,1185.0,1304.0,4962.0
ENPH,1451.0,1488.0,1488.0,1498.0,1497.0,1453.0,1504.0,1608.0,1495.0
GOOG,126183.0,133284.0,73876.0,71082.0,62196.0,49425.0,49061.0,50020.0,4962.0


In [186]:
# setting up the data to be passed to the chart

df = df2.copy()
df['label'] = df['sentiment'].apply(lambda x: 'bullish' if x > 0.5 else 'bearish')

sent_avg_sum = df.groupby(['Datetime', 'Stock Name'], as_index=False).agg({
    'sentiment': ['mean', 'count']})
sent_avg_sum.columns = ['Datetime', 'Stock Name', 'sentiment avg', 'sent_total']

sent_count = df.groupby(['Datetime', 'Stock Name', 'label'], as_index=False)['sentiment'].count().rename(
    columns={'sentiment': 'sent_count'})

df = pd.merge(sent_avg_sum, sent_count, on=['Datetime', 'Stock Name'])

df['sentiment pct'] = df['sent_count'] / df['sent_total']
df.drop(['sent_count', 'sent_total'], axis=1, inplace=True)
df.rename(columns={'Datetime': 'Date'}, inplace=True)

df = df.merge(stocks, on=['Date', 'Stock Name'])

In [188]:
df

Unnamed: 0,Date,Stock Name,sentiment avg,label,sentiment pct,Open,High,Low,Close,Adj Close,Volume
0,2015-01-02,AAPL,0.688692,bearish,0.306551,27.847500,27.860001,26.837500,27.332500,24.603210,212818400
1,2015-01-02,AAPL,0.688692,bullish,0.693449,27.847500,27.860001,26.837500,27.332500,24.603210,212818400
2,2015-01-02,AMZN,0.643557,bearish,0.345455,15.629000,15.737500,15.348000,15.426000,15.426000,55664000
3,2015-01-02,AMZN,0.643557,bullish,0.654545,15.629000,15.737500,15.348000,15.426000,15.426000,55664000
4,2015-01-02,CRM,0.781111,bearish,0.222222,59.900002,60.430000,58.509998,59.240002,59.240002,2796400
...,...,...,...,...,...,...,...,...,...,...,...
46652,2023-03-01,VZ,0.863954,bullish,0.866667,38.709999,38.709999,38.150002,38.299999,38.299999,18401100
46653,2023-03-01,XPEV,0.947021,bearish,0.058824,9.210000,9.250000,8.670000,8.860000,8.860000,12847800
46654,2023-03-01,XPEV,0.947021,bullish,0.941176,9.210000,9.250000,8.670000,8.860000,8.860000,12847800
46655,2023-03-01,ZS,0.921528,bearish,0.043478,131.149994,131.699997,126.500000,128.440002,128.440002,1750400


In [189]:
df.to_csv('./data/cleaned_up_data/final_stock_tweets_summary.csv', index_label=False)

In [181]:
stocks.iloc[]

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Stock Name
0,2015-01-02,14.858000,14.883333,14.217333,14.620667,14.620667,71466000,TSLA
1,2015-01-05,14.303333,14.433333,13.810667,14.006000,14.006000,80527500,TSLA
2,2015-01-06,14.004000,14.280000,13.614000,14.085333,14.085333,93928500,TSLA
3,2015-01-07,14.223333,14.318667,13.985333,14.063333,14.063333,44526000,TSLA
4,2015-01-08,14.187333,14.253333,14.000667,14.041333,14.041333,51637500,TSLA
...,...,...,...,...,...,...,...,...
45435,2022-09-22,13.730000,14.600000,13.400000,13.740000,13.740000,23489400,XPEV
45436,2022-09-23,13.090000,13.892000,12.860000,13.710000,13.710000,28279600,XPEV
45437,2022-09-26,14.280000,14.830000,14.070000,14.370000,14.370000,27891300,XPEV
45438,2022-09-27,14.580000,14.800000,13.580000,13.710000,13.710000,21160800,XPEV


In [172]:
pd.concat([STM, MTM]).min()
# pd.concat([STM, MTM]).max()

0.49567809450588896

In [174]:
def update_line_chart(company, df):

    if company == 'All':
        # change this when you're done with testing
        company = 'AAPL'
        
    data = df[df['Stock Name'] == company]

    STM = data['sentiment avg'].rolling(50).mean() #short term sentiment
    MTM = data['sentiment avg'].rolling(200).mean() #long term sentiment
    data['SMA30'] = data['sentiment avg'].rolling(30).mean()
    data['SMA90'] = data['sentiment avg'].rolling(90).mean()
    
    data['label'] = np.where(data['SMA30']>data['SMA90'], 1, 0)
    data['group'] = data['label'].ne(data['label'].shift()).cumsum()

    # create subplot layout
    fig = make_subplots(
        rows=4, cols=1, 
        row_heights=[1.5, 1.5, 1.5, 1.5],
        vertical_spacing=0.1,
        specs=[[{"secondary_y": True, 'rowspan': 2}], 
               [None],
               [{'rowspan': 1}],
               [{'rowspan': 1}]]
    )
    
    
######## subplot 1: sentiment v. stock ########

    # subplot 1A: candlestick trace
    fig.add_trace(
        go.Candlestick(x=data['Date'],
                    open=data['Open'],
                    high=data['High'],
                    low=data['Low'],
                    close=data['Close'],
                    name=""),
        secondary_y=True)
    
    # subplot 1B: grouping colors by trace crosses
    combined1 = data.copy()
    combined = data.groupby('group')
    combined_s = []
    for _, dta in combined:
        combined_s.append(dta)

    # custom function to set fill colors
    def fillcol(label):
        if label >= 1:
            return 'rgba(0,250,0,0.4)'
        else:
            return 'rgba(250,0,0,0.4)'

    # subplot 1B: make moving average lines transparrent
    for df in combined_s:
        fig.add_traces(go.Scatter(x=df.Date, y = df.SMA30,
                                line = dict(color='rgba(0,0,0,0)'),
                                hoverinfo='skip'
                                ))
        
        fig.add_traces(go.Scatter(x=df.Date, y = df.SMA90,
                                line = dict(color='rgba(0,0,0,0)'),
                                fill='tonexty', 
                                fillcolor = fillcol(df['label'].iloc[0]),
                                hoverinfo='skip'
                                ))

    # subplot 1B: add colors for traces that cross MA
    fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA30,
                            line = dict(color = 'green', width=1), 
                            name='MA30', hoverinfo='skip'
                            ))

    fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA90,
                            line = dict(color = 'red', width=1), 
                            name='MA90', hoverinfo='skip'
                            ))

######## subplot 2: sentiment ########

    trace1 = go.Line(
        x = data['Date'],
        y = STM,
        name='Short Term',
        marker_line_width=0,
        marker_color='orange',
        )
    trace2 = go.Line(
        x = data['Date'],
        y = MTM,
        name='Long Term',
        marker_line_width=0,
        marker_color='blue',
        )

    fig.add_traces([trace1, trace2], rows=3, cols=1)

######## subplot 3: stock volume ########
    stock_vol = go.Bar(
        x = data['Date'],
        y = data['Volume'],
        name = 'Volume',
        marker_color='black',
    )
    fig.add_trace(stock_vol, row=4, col=1)
    
######## subplot layouts ########

    # Set title
    fig.layout.update(title=f'{company} Stock Price v. Sentiment',
                     showlegend=True, hovermode='closest')

    # Set axis titles
    fig.update_xaxes(title_text="Date", row=1, col=1)

    fig.update_yaxes(title_text="Stock Price", secondary_y=True, row=1, col=1)
    fig.update_yaxes(title_text="Stock Sentiment", secondary_y=False, row=1, col=1)
    fig.update_yaxes(title_text="Sentiment", secondary_y=False, row=3, col=1)
    fig.update_yaxes(title_text="Volume", secondary_y=False, row=4, col=1)

    # hiding the bottom range window
    fig.update_layout(xaxis_rangeslider_visible=False)

    # updating y-axis ranges for the subplot
    company_dct = {
        'AAPL': {'sentiment': [0.6, 0.8], 'volume': [0, 500000000]}
    }
    
    try:
        
        fig.update_yaxes(tickmode='array', tickvals=company_dct[company]['sentiment'], row=3, col=1)
        fig.update_yaxes(tickmode='array', tickvals=company_dct[company]['volume'], row=4, col=1)
        fig.update_yaxes(range=company_dct[company]['volume'], secondary_y=False, row=4, col=1)
        
    except:
        pass
    
    fig.update_layout(
    # width=500,
    # height=850,
    showlegend=False,
    hovermode='x unified', 
    template='plotly_white',
    legend=dict(
        x=0,
        y=1.05,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        )),
    )
    fig.update_traces(xaxis='x1')
    return fig

fig = update_line_chart('AAPL', df)
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [47]:
data[data['label'] == 'bearish']['sentiment pct'] * 100

0       30.655130
2       34.343434
4       27.371865
6       28.278689
8       21.883173
          ...    
3267    50.000000
3269    40.000000
3271    27.272727
3273    25.000000
3275    60.655738
Name: sentiment pct, Length: 1580, dtype: float64

In [27]:
pd.to_datetime('2017-12-11').date()

datetime.date(2017, 12, 11)

In [96]:
company = 'AAPL'
data = df[df['Stock Name'] == company]

data['SMA30'] = data['sentiment avg'].rolling(30).mean()
data['SMA90'] = data['sentiment avg'].rolling(90).mean()    
data['label'] = np.where(data['SMA30']>data['SMA90'], 1, 0)
data['group'] = data['label'].ne(data['label'].shift()).cumsum()

fig = make_subplots(rows=4, cols=1, specs=[[{"secondary_y": True, 'rowspan': 2}], 
                                           [None],
                                           [{'rowspan': 1}],
                                           [{'rowspan': 1}]], vertical_spacing=0.05)

fig.add_trace(
    go.Candlestick(x=data['Date'],
                open=data['Open'],
                high=data['High'],
                low=data['Low'],
                close=data['Close'],
                name=""),
    secondary_y=True,
)

combined1 = data.copy()

# split data into chunks where averages cross each other
combined = data.groupby('group')
combined_s = []
for _, dta in combined:
    combined_s.append(dta)

# custom function to set fill color
def fillcol(label):
    if label >= 1:
        return 'rgba(0,250,0,0.4)'
    else:
        return 'rgba(250,0,0,0.4)'

for df in combined_s:
    fig.add_traces(go.Scatter(x=df.Date, y = df.SMA30,
                            line = dict(color='rgba(0,0,0,0)'),
                            hoverinfo='skip'
                            ))

    fig.add_traces(go.Scatter(x=df.Date, y = df.SMA90,
                            line = dict(color='rgba(0,0,0,0)'),
                            fill='tonexty', 
                            fillcolor = fillcol(df['label'].iloc[0]),
                            hoverinfo='skip'
                            ))

# include averages
fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA30,
                        line = dict(color = 'green', width=1), 
                        name='MA30', hoverinfo='skip'
                        ))

fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA90,
                        line = dict(color = 'red', width=1), 
                        name='MA90', hoverinfo='skip'
                        ))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

# couldnt seem to get the charts to show up

In [6]:
def update_line_chart2(company):

    if company == 'All':
        # change this when you're done with testing
        company = 'AAPL'

    df_comp = combined_df[combined_df['company'] == company]
    df_comp = df_comp.groupby('date', as_index=False)['sentiment'].mean()
    df_comp['SMA30'] = df_comp['sentiment'].rolling(30).mean()
    df_comp['SMA90'] = df_comp ['sentiment'].rolling(90).mean()
    df_comp = df_comp.rename(columns={'date': 'Date'})

    stock_comp = stocks_df[stocks_df['Stock Name'] == company]
    stock_comp['Date'] = pd.to_datetime(stock_comp['Date'])
    stock_comp['Date'] = stock_comp['Date'].dt.date
    combined = pd.merge(df_comp, stock_comp, how='left', on="Date")
    #data = combined.to_dict("records")
    # Create figure with secondary y-axis
    fig = make_subplots(rows=4, cols=1, specs=[[{"secondary_y": True, 'rowspan': 2}], 
                                               [None],
                                               [{'rowspan': 1}],
                                               [{'rowspan': 1}]], vertical_spacing=0.05)

    # Add traces
    fig.add_trace(
        go.Candlestick(x=combined['Date'],
                    open=combined['Open'],
                    high=combined['High'],
                    low=combined['Low'],
                    close=combined['Close'],
                    name=""),
        secondary_y=True,
    )
    
    combined1 = combined.copy()

    # split data into chunks where averages cross each other
    combined['label'] = np.where(combined['SMA30']>combined['SMA90'], 1, 0)
    combined['group'] = combined['label'].ne(combined['label'].shift()).cumsum()
    combined2 = combined.groupby('group')
    combined_s = []
    for name, data in combined2:
        combined_s.append(data)

    # custom function to set fill color
    def fillcol(label):
        if label >= 1:
            return 'rgba(0,250,0,0.4)'
        else:
            return 'rgba(250,0,0,0.4)'

    for df in combined_s:
        fig.add_traces(go.Scatter(x=df.Date, y = df.SMA30,
                                line = dict(color='rgba(0,0,0,0)'),
                                hoverinfo='skip'
                                ))
        
        fig.add_traces(go.Scatter(x=df.Date, y = df.SMA90,
                                line = dict(color='rgba(0,0,0,0)'),
                                fill='tonexty', 
                                fillcolor = fillcol(df['label'].iloc[0]),
                                hoverinfo='skip'
                                ))

    # include averages
    fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA30,
                            line = dict(color = 'green', width=1), 
                            name='MA30', hoverinfo='skip'
                            ))

    fig.add_traces(go.Scatter(x=combined1.Date, y = combined1.SMA90,
                            line = dict(color = 'red', width=1), 
                            name='MA90', hoverinfo='skip'
                            ))
    df2 = combined_df[combined_df['company'] == company]
    comp_group = df2.groupby(by=["date", "sentiment"], as_index=False).agg(
        count_col=pd.NamedAgg(column="sentiment", aggfunc="count"))

    # subplot 1: sentiment volume
    trace1 = go.Bar(
        x = comp_group['date'],
        y = comp_group[comp_group['sentiment'] == 1]['count_col'],
        name='Bullish',
        marker_color='green',
        marker_line_width=0
        )
    trace2 = go.Bar(
        x = comp_group['date'],
        y = comp_group[comp_group['sentiment'] == -1]['count_col'],
        name='Bearish',
        marker_color='red',
        marker_line_width=0
        )
    fig.add_traces([trace1, trace2], rows=3, cols=1)
    fig.update_layout(barmode = 'stack')

    # subplot 2: stock volume
    stock_vol = go.Bar(
        x = combined['Date'],
        y = combined['Volume'],
        name = 'Volume',
        marker_color='blue')
    fig.add_trace(stock_vol, row=4, col=1)

    # Set title
    fig.layout.update(title=f'{company} Stock Price v. Sentiment',
                     showlegend=True, hovermode='closest')

    # Set x-axis title
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=3, col=1)
    fig.update_xaxes(title_text="Date", row=4, col=1)
    fig.update_layout(xaxis_rangeslider_visible=False)
    # Set y-axes titles
    fig.update_yaxes(title_text="Stock Price", secondary_y=True, row=1, col=1)
    fig.update_yaxes(title_text="Stock Sentiment", secondary_y=False, row=1, col=1)
    fig.update_yaxes(title_text="Sentiment Volume", secondary_y=False, row=3, col=1)
    fig.update_yaxes(title_font=dict(size=10), secondary_y=False, row=3, col=1)
    fig.update_yaxes(title_text="Stock Volume", secondary_y=False, row=4, col=1)
    fig.update_yaxes(title_font=dict(size=10), secondary_y=False, row=4, col=1)

    # set y-axes subplots to display only min/max
    # fig.update_layout(yaxis=dict(tickmode='linear', nticks=2, 
    #     range=[min(combined['Volume']), max(combined['Volume'])], row=4, col=1))
    
    fig.update_layout(
    # width=500,
    # height=850,
    showlegend=False,
    hovermode='x unified', 
    template='plotly_white',
    legend=dict(
    x=0,
    y=1.05,
    traceorder="normal",
    font=dict(
        family="sans-serif",
        size=12,
        color="black"
    )))
    fig.update_traces(xaxis='x1')
    return fig

In [160]:
STM

0            NaN
1            NaN
2            NaN
3            NaN
4            NaN
          ...   
3272    0.569843
3273    0.576303
3274    0.582764
3275    0.580069
3276    0.577374
Name: sentiment avg, Length: 3080, dtype: float64

In [7]:
update_line_chart2('META')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_comp['Date'] = pd.to_datetime(stock_comp['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_comp['Date'] = stock_comp['Date'].dt.date


In [12]:
import plotly.express as px 
  
# using the iris dataset
df = px.data.iris() 
  
# plotting the line chart
fig = px.line(df, x="species", y="petal_width") 
  
# showing the plot
fig.show()

In [29]:
dff = df[df['Stock Name'] == 'AAPL']
x = dff['Date']
y1 = dff[dff['sentiment'] == 'bullish']['sentiment_count']
y2 = dff[dff['sentiment'] == 'bearish']['sentiment_count']

In [31]:
df.

Unnamed: 0,Date,Stock Name,sentiment,sentiment_count,sentiment_avg,Open,High,Low,Close,Adj Close,Volume
0,2015-01-02,AAPL,bearish,248,404.5,27.847500,27.860001,26.837500,27.332500,24.603210,212818400
1,2015-01-02,AAPL,bullish,561,404.5,27.847500,27.860001,26.837500,27.332500,24.603210,212818400
15,2015-01-05,AAPL,bearish,306,445.5,27.072500,27.162500,26.352501,26.562500,23.910091,257142000
16,2015-01-05,AAPL,bullish,585,445.5,27.072500,27.162500,26.352501,26.562500,23.910091,257142000
32,2015-01-06,AAPL,bearish,251,458.5,26.635000,26.857500,26.157499,26.565001,23.912342,263188400
...,...,...,...,...,...,...,...,...,...,...,...
37143,2022-09-26,AAPL,bullish,8,5.5,149.660004,153.770004,149.639999,150.770004,150.520309,93339400
37162,2022-09-27,AAPL,bearish,1,2.0,152.740005,154.720001,149.949997,151.759995,151.508667,84442700
37163,2022-09-27,AAPL,bullish,3,2.0,152.740005,154.720001,149.949997,151.759995,151.508667,84442700
37179,2022-09-28,AAPL,bearish,37,30.5,147.639999,150.639999,144.839996,149.839996,149.591843,146691400


In [249]:
df_sent

Unnamed: 0,Date,ticker,sentiment,label
0,2014-12-31,BX,0.018023,bearish
1,2014-12-31,COST,0.987811,bullish
2,2014-12-31,CRM,0.998855,bullish
3,2014-12-31,DIS,0.998835,bullish
4,2014-12-31,KO,0.386977,bearish
...,...,...,...,...
33043,2022-09-29,NFLX,0.666051,bullish
33044,2022-09-29,NIO,0.996589,bullish
33045,2022-09-29,PYPL,0.008032,bearish
33046,2022-09-29,TSLA,0.707093,bullish


In [None]:
# def update_line_chart(company, yrs):
# #     if analysis == [] or company is 'All':
# #         return {}, []

#     if company == 'All':
#         # change this when you're done with testing
#         df = globals()['AAPL'][globals()['AAPL'].Date.dt.year.between(yrs[0], yrs[1])]
#     else:
#         df = globals()[company][globals()[company].Date.dt.year.between(yrs[0], yrs[1])]

#     data = df.to_dict("records")

#     fig = make_subplots(
#         rows=3, cols=1,
#         specs=[[{'rowspan': 2}],
#             [None],
#             [{'rowspan': 1}]],
#         vertical_spacing=0.05)

#     stock = go.Scatter(x=df['Date'], y=df['Adj Close'], name="Adj. Close")
#     MA30 = go.Scatter(x=df['Date'], y=df['High'].rolling(window=30).mean(), name="30 day MA")
#     MA50 = go.Scatter(x=df['Date'], y=df['High'].rolling(window=50).mean(), name="50 day MA")
#     sentiment = go.Bar(x=df['Date'], y=df['sentiment'], name="Sentiment", marker=dict(color=df['color'], line=dict(width=0)), showlegend=False)
    
#     fig.append_trace(stock, row=1, col=1)
#     fig.append_trace(MA30, row=1, col=1)
#     fig.append_trace(MA50, row=1, col=1)
#     fig.append_trace(sentiment, row=3, col=1)

#     fig.update_yaxes(title_text='Stock Price', row=1, col=1)
#     fig.update_yaxes(title_text='Sentiment', row=3, col=1)
#     fig.update_yaxes(tickmode='array',
# #                  tickvals=[0, 0.5, 1],
#                  row=3, col=1)

#     fig.layout.update(title=f'{company} Stock Price v. Sentiment',
#                      height=600, width=850, showlegend=True, hovermode='closest')

#     fig.update_layout(
#         template='plotly_white',
#         hovermode='x unified', 
#         legend=dict(
#         x=0,
#         y=1.05,
#         traceorder="normal",
#         font=dict(
#             family="sans-serif",
#             size=12,
#             color="black"
#         )))
#     fig.update_traces(xaxis='x1')
    
#     return fig