In [61]:
#  Python imports
from bs4 import BeautifulSoup
import requests
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from random import randint
from time import sleep
import os

In [62]:
def load_results_from_file():
    #  Check if output file exists
    file_name = 'sentiment_results_survey.csv'
    if os.path.exists(file_name):
        df = pd.read_csv(file_name, parse_dates=False)
        return df
    else:
        return None

In [63]:
def retrieve_survey_results(url):
    try:
        #  Create session
        session = requests.Session()
        response = session.get(url)
        session_cookies = session.cookies
        cookies_dictionary = session_cookies.get_dict()
        
        #  Get cookie params
        list_from_dict = []
        for key in cookies_dictionary:
            list_from_dict.append(key)
            list_from_dict.append(cookies_dictionary[key])

        #  Build cookie
        cookie='{}={}; {}={}'.format(list_from_dict[2], list_from_dict[3], list_from_dict[0], list_from_dict[1])
        headers = {
            'Host': 'www.aaii.com',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0',
            'DNT': '1',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'ru,en-US;q=0.9,en;q=0.8,tr;q=0.7',
            'Cookie': '{}'.format(cookie),
        }
        html_text = requests.get(url, headers=headers,timeout=20).text
        return html_text
    except:
        print('Fetch request failed')
        return None

In [64]:
def process_results(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    results = soup.find_all('tr',{'align': 'center'})
    results_df = pd.DataFrame()
    for i in range(1, len(results) - 1):
        elements = results[i].find_all('td')
        reported_date =elements[0].getText().strip().replace(':','')

        bullish=elements[1].getText().strip().replace('%','')
        neutral=elements[2].getText().strip().replace('%','')
        bearish=elements[3].getText().strip().replace('%','')
        result_row = pd.DataFrame({"reported_date":[reported_date],"bullish":[bullish],"bearish":[bearish],"neutral":[neutral]})
        results_df = pd.concat([results_df, result_row], axis=0, ignore_index = True)
        
    return results_df 

In [65]:
def plot_results(df):
    fig = make_subplots(rows=1, cols=1)

    #  Plot sentiments
    fig.add_trace(go.Line(x = df.index, y = df['neutral'], line=dict(color="blue", width=1), name="Neutral"), row = 1, col = 1)
    fig.add_trace(go.Line(x = df.index, y = df['bullish'], line=dict(color="green", width=1), name="Bullish"), row = 1, col = 1) 
    fig.add_trace(go.Line(x = df.index, y = df['bearish'], line=dict(color="red", width=1), name="Bearish"), row = 1, col = 1) 
    
    fig.update_layout(
        title={'text':'AAII Sentiment Results', 'x':0.5},
        autosize=False,
        width=800,height=400)
    fig.update_yaxes(range=[0,1000000000],secondary_y=True)
    fig.update_yaxes(visible=False, secondary_y=True)  #hide range slider
    
    fig.show()

In [66]:
#  Try to load from file
results_df = load_results_from_file()
#  Reverse rows
results_df[::-1]
print(results_df)

if results_df is None:
    print('Results file not found - retrieving info remotely')
    #  Load remotely
    url = 'https://www.aaii.com/sentimentsurvey/sent_results'
    html_text = retrieve_survey_results(url)
    print(html_text)
    #  Pass robots prevention
    if "Incapsula incident ID" in html_text:
        print('Blocked by robots prevention - trying again later')
    else:
        #  Process results
        results_df = process_results(html_text)
        print(results_df)
        #  Store file for next time
        results_df.to_csv('sentiment_results_survey.csv')

#  Plot results
if results_df is not None:
    plot_results(results_df)


    Unnamed: 0 reported_date  bullish  bearish  neutral
0           20    December 8     29.7     30.5     39.8
1           19   December 15     25.2     39.3     35.4
2           18   December 22     29.6     33.9     36.6
3           17   December 29     37.7     30.5     31.8
4           16     January 5     32.8     33.3     33.9
5           15    January 12     24.9     38.3     36.8
6           14    January 19     21.0     46.7     32.3
7           13    January 26     23.1     52.9     23.9
8           12    February 2     26.5     43.7     29.9
9           11    February 9     24.4     35.5     40.2
10          10   February 16     19.2     43.2     37.6
11           9   February 23     23.4     53.7     22.9
12           8       March 2     30.4     41.4     28.2
13           7       March 9     24.0     45.8     30.2
14           6      March 16     22.5     49.8     27.8
15           5      March 23     32.8     35.4     31.7
16           4      March 30     31.9     27.5  


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


