In [82]:
## Sentiment Analysis - Regenerative Agriculture

In [10]:
# create function to get info 
import requests
def get_pushshift_data(data_type, **kwargs):
    """ 
    Gets data from the pushshift api.
    data_type can be 'comment' or 'submission'
    other args are interpreted as payload.
    Read more: https://github.com/pushshift/api
    """
    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return request.json()


def polarity_color(num):
    if num > 0.0:
        return 'blue'
    elif num == 0.0:
        return 'white'
    else:
        return 'red'
    
# function for making clickable links in df table
def make_clickable(val):
    return '<a href="{}">Link</a>'.format(val,val)

In [17]:
import dash
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
server = app.server
app.config.suppress_callback_exceptions = True


In [15]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from datetime import date
import re
import textblob

import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output

##########################

### data.py will give us a pandas DataFrame which have all data required for charts.

############################

def get_reddit_data(days,query,dataType):
    
    data = get_pushshift_data(data_type=dataType,
                          #q="regenerative agriculture",
                          q= query,
                          #after="7d",
                          after = days ,
                          size=1000,
                          sort_type="score",
                          sort="desc").get("data")
    if len(data) > 0 :
            # define columns of interest
        columns_of_interest = ["author", "body", "created_utc", "score", "permalink"]

            # transform the response into a dataframe with relevant columns
        df= pd.DataFrame.from_records(data)[columns_of_interest]
            #df =pd.DataFrame.from_records(data)
    
            # create a column with sentiment polarity
        df["sentiment_polarity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.polarity, axis=1)

            # create a column with sentiment subjectivity
        df["sentiment_subjectivity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.subjectivity, axis=1)

            # create a column with 'positive' or 'negative' depending on sentiment_polarity
        df["sentiment"] = df.apply(lambda row: "positive" if row["sentiment_polarity"] >= 0 else "negative", axis=1)

            # create a column with a text preview that shows the first 50 characters
        df["preview"] = df["body"].str[0:50]

            # take the created_utc parameter and tranform it into a datetime column
        df["date"] = pd.to_datetime(df['created_utc'],unit='s')
    
    
        df.sort_values(by='date', inplace=True)
        df.index = pd.to_datetime(df['date'])
        df['mean'] = df['sentiment_polarity'].expanding().mean()
        df['rolling'] = df['sentiment_polarity'].rolling('4h').mean()
        df['scaled_polarity']=abs(1000*df.sentiment_polarity)
    
        df['polarity_color']=df.apply(lambda x: polarity_color(x['sentiment_polarity']), axis=1 )
            # append the string to all the permalink entries so that there's a link to the comment
        
        df['permalink'] = "https://reddit.com" + df['permalink'].astype(str)
        
        # style the last column to be clickable and print
        df.style.format({'permalink': make_clickable}) ################# Permalink clickable not working. Will come to this later
        return df
    else:
        return ('No data available for the set dates')

    

df1=get_reddit_data("7d","regenerative agriculture","comment")
#df1


In [20]:
import requests

##############################
#Callbacks and data sharing
#We add a new callback to get data.
#Data callback
###################################

# import get_data function from data.py

#from data import get_coin_data 

@app.callback(Output('intermediate-value', 'children'),
              [Input('dropdown', 'value')])

#def get_data(option): # option from drop down 
    
def get_data(option,query,dataType):
    # create function to get info 
    ################################################################
    def get_pushshift_data(data_type='comment', **kwargs):
    """ 
    Gets data from the pushshift api.
    data_type can be 'comment' or 'submission'
    other args are interpreted as payload.
    Read more: https://github.com/pushshift/api
    """
    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return request.json()


    def polarity_color(num):
        if num > 0.0:
            return 'blue'
        elif num == 0.0:
            return 'white'
        else:
            return 'red'
    
# function for making clickable links in df table
    def make_clickable(val):
        return '<a href="{}">Link</a>'.format(val,val)
########################################################################    
    data = get_pushshift_data(data_type=dataType,
                          #q="regenerative agriculture",
                          q= query,
                          #after="7d",
                          after = days ,
                          size=1000,
                          sort_type="score",
                          sort="desc").get("data")
    if len(data) > 0 :
            # define columns of interest
        columns_of_interest = ["author", "body", "created_utc", "score", "permalink"]

            # transform the response into a dataframe with relevant columns
        df= pd.DataFrame.from_records(data)[columns_of_interest]
            #df =pd.DataFrame.from_records(data)
    
            # create a column with sentiment polarity
        df["sentiment_polarity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.polarity, axis=1)

            # create a column with sentiment subjectivity
        df["sentiment_subjectivity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.subjectivity, axis=1)

            # create a column with 'positive' or 'negative' depending on sentiment_polarity
        df["sentiment"] = df.apply(lambda row: "positive" if row["sentiment_polarity"] >= 0 else "negative", axis=1)

            # create a column with a text preview that shows the first 50 characters
        df["preview"] = df["body"].str[0:50]

            # take the created_utc parameter and tranform it into a datetime column
        df["date"] = pd.to_datetime(df['created_utc'],unit='s')
    
    
        df.sort_values(by='date', inplace=True)
        df.index = pd.to_datetime(df['date'])
        df['mean'] = df['sentiment_polarity'].expanding().mean()
        df['rolling'] = df['sentiment_polarity'].rolling('4h').mean()
        df['scaled_polarity']=abs(1000*df.sentiment_polarity)
    
        df['polarity_color']=df.apply(lambda x: polarity_color(x['sentiment_polarity']), axis=1 )
            # append the string to all the permalink entries so that there's a link to the comment
        
        df['permalink'] = "https://reddit.com" + df['permalink'].astype(str)
        
        # style the last column to be clickable and print
        df.style.format({'permalink': make_clickable}) ################# Permalink clickable not working. Will come to this later
        return df
    else:
        return ('No data available for the set dates')

    

df1=get_reddit_data("7d","regenerative agriculture","comment")
#df1

    
    df=get_reddit_data("7d","regenerative agriculture","comment")
    return df.to_json(date_format='iso', orient='split')


In [19]:
@app.callback(Output('table-output', 'children'), 
              [Input('intermediate-value', 'children')])
def get_data_table(data):
    df = pd.read_json(data, orient='split')
    df['date'] = pd.to_datetime(df['date'])
    data_table = dash_table.DataTable(
        id='datatable-data',
        data=df.to_dict('records'),
        columns=[{'id': c, 'name': c} for c in df.columns],
        style_table={'overflowY': 'scroll'},
        fixed_rows={'headers': True, 'data': 10},
        style_cell={'width': '100px'},
        style_header={
            'backgroundColor': 'rgb(230, 230, 230)',
            'fontWeight': 'bold'
        }
    )
    return data_table



In [21]:
@app.callback(Output('graph-output', 'children'),
              [Input('intermediate-value', 'children')])

def render_content(data):
    df=pd.read_json(data, orient='split')
    
    return html.Div([
            html.H3(dcc.Graph(
                            id='SMA',
                            figure={
                                'data': [
                                    {'x': df['date'], 'y': df['sentiment_polarity'], 'type': 'scatter', 'name': 'sentiment_polarity'},
                                    {'x': df['date'], 'y': df['rolling'], 'type': 'scatter', 'name': 'rolling', 'secondary_y':True},
                                ],
                                'layout': {
                                    'title': 'Simple Rolling Average',
                                    'height': 700,
                                    'xaxis': x_axis,
                                    'yaxis': y_axis,
                                    'plot_bgcolor': colors['background2'],
                                    'paper_bgcolor': colors['background'],
                                    'font': {
                                         'color': colors['text'],
                                         'size':18
                                    }
                                }
                            }
                        )),
        ])


In [22]:
app.layout = html.Div(
    [
        html.H1(
            "Dashboard",
            style={"textAlign": "center",
                   "color": "white",
                   # "background": "yellow"
                   },
        ),
        html.Div(
            ["Select days here",
             dcc.Dropdown(
                    id="dropdown",
                    options=[{"label": i, "value": i} for i in ["7d", "28d"]],
                    value="7d",
                    optionHeight=10,
                    style={
                        "height": "50px",
                        "font-weight": 100,
                        "font-size": "16px",
                        "line-height": "10px",
                        "textAlign": "center",
                        "color": "gray",
                        "margin": 0,
                        "padding": "8px",
                        "background": "yellow",
                        "position": "middle",
                        "display": "inline-block",
                        "width": "150px",
                        "vertical-align": "middle",
                    },
                ),
            ],
            className="selectors",
            style={
                "marginTop": 0,
                "marginBottom": 0,
                "font-size": 30,
                "color": "white",
                "display": "inline-block",
            },
        ),

        html.Div(id="graph-output", className="graph-display"),
        html.Div(
            children=[
                html.H1(
                    children="Data Table",  # html for table
                    style={"textAlign": "center", "background": "yellow"},
                )
            ]
        ),
        html.Div(className="data-table",
            children=[html.Table(id="table"), html.Div(id="table-output")]),
        html.Div(
            children=[
                dcc.Markdown(  # markdown
                    " © 2019 [DCAICHARA](https://github.com/dc-aichara)  All Rights Reserved."
                )
            ],
            style={"textAlign": "center", "background": "yellow"},
        ),
    ],
    style={"background": "#000080"},
)




@app.callback(Output('table-output', 'children'), 
              [Input('intermediate-value', 'children')])
def get_data_table(data):
    df = pd.read_json(data, orient='split')
    df['date'] = pd.to_datetime(df['date'])
    data_table = dash_table.DataTable(
        id='datatable-data',
        data=df.to_dict('records'),
        columns=[{'id': c, 'name': c} for c in df.columns],
        style_table={'overflowY': 'scroll'},
        fixed_rows={'headers': True, 'data': 10},
        style_cell={'width': '100px'},
        style_header={
            'backgroundColor': 'rgb(230, 230, 230)',
            'fontWeight': 'bold'
        }
    )
    return data_table


@app.callback(Output("intermediate-value", "children"),
              [Input("dropdown", "value")])
def get_data(option):
    df = get_coin_data(crypto=option, save_data=None)
    return df.to_json(date_format="iso", orient="split")



@app.callback(Output('graph-output', 'children'),
              [Input('intermediate-value', 'children')])

def render_content(data):
    df=pd.read_json(data, orient='split')
    
    return html.Div([
            html.H3(dcc.Graph(
                            id='SMA',
                            figure={
                                'data': [
                                    {'x': df['date'], 'y': df['sentiment_polarity'], 'type': 'scatter', 'name': 'sentiment_polarity'},
                                    {'x': df['date'], 'y': df['rolling'], 'type': 'scatter', 'name': 'rolling', 'secondary_y':True},
                                ],
                                'layout': {
                                    'title': 'Simple Rolling Average',
                                    'height': 700,
                                    'xaxis': x_axis,
                                    'yaxis': y_axis,
                                    'plot_bgcolor': colors['background2'],
                                    'paper_bgcolor': colors['background'],
                                    'font': {
                                         'color': colors['text'],
                                         'size':18
                                    }
                                }
                            }
                        )),
        ])



if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [01/Feb/2021 19:19:52] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [01/Feb/2021 19:19:53] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [01/Feb/2021 19:19:53] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [01/Feb/2021 19:19:53] "[37mGET /_dash-component-suites/dash_core_components/async-markdown.v1_15_0m1611086576.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [01/Feb/2021 19:19:53] "[37mGET /_dash-component-suites/dash_core_components/async-highlight.v1_15_0m1611086576.js HTTP/1.1[0m" 200 -


In [None]:
import plotly.graph_objects as go
import pandas as pd

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=df.date,
                                y=df.sentiment_polarity,
                                mode='markers',
                                marker=dict(
                                            color=df.polarity_color,
                                            size=df.scaled_polarity,
                                            sizemode='area',
                                            sizemin=2),text=df.preview,showlegend=False))

fig.add_trace(go.Scatter(x=df.date, y=df['rolling'],line = dict(color='green', width=6, dash='dot'),name='Rolling Mean'))
fig.add_trace(go.Scatter(x=df.date, y=df['mean'],line = dict(color='yellow', width=6, dash='dot'),name='Expanding Mean'))

fig.update_layout(title='Comment Sentiment in the World News for the past 48 hours')
fig.update_xaxes(title='Date').update_yaxes(title='Comment Polarity')

#fig.update_layout(showlegend=True)

fig.show()