In [None]:
import pandas as pd
import requests
import data_reader
import results_analyser
from requests.exceptions import HTTPError
from bs4 import BeautifulSoup
from tqdm import tqdm 
tqdm.pandas()

polarity_classification_url = "https://sentic.net/api/en/WL4p0lspQV18.py?text="

In [None]:
def polarity_processor(row):
    url = polarity_classification_url + row['text_cleaned']
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.text[:-1]
    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except Exception as err:
        print(f'Other error occurred: {err}')

## Data 1

In [None]:
data1 = data_reader.read_data1("dataframe")
data1

In [None]:
data1['sentic_polarity'] = data1.progress_apply(lambda row: polarity_processor(row), axis=1)

In [None]:
data1['sentic_polarity'] = [1 if polar=="POSITIVE" else 0 for polar in data1['sentic_polarity']]

## Data 2

In [None]:
data2 = data_reader.read_data2("dataframe")
data2

In [None]:
data2['sentic_polarity'] = data2.progress_apply(lambda row: polarity_processor(row), axis=1)

In [None]:
data2['sentic_polarity'] = [1 if polar=="POSITIVE" else 0 for polar in data2['sentic_polarity']]

## Data 3

In [None]:
data3 = data_reader.read_data3("dataframe")
data3

In [None]:
data3['sentic_polarity'] = data3.progress_apply(lambda row: polarity_processor(row), axis=1)

In [None]:
data3['sentic_polarity'] = [1 if polar=="POSITIVE" else 0 for polar in data3['sentic_polarity']]

## Overall Metrics

In [None]:
results_df = pd.DataFrame()
results_df = results_analyser.calculate_metrics(results_df, data1['Label'], data1['sentic_polarity'], "data1")
results_df = results_analyser.calculate_metrics(results_df, data2['Label'], data2['sentic_polarity'], "data2")
results_df = results_analyser.calculate_metrics(results_df, data3['Label'], data3['sentic_polarity'], "data3")
results_df

In [None]:
data1['sentic_polarity'].value_counts()

In [None]:
data1.to_csv("./results/data1_baseline.csv",index=False)
data2.to_csv("./results/data2_baseline.csv",index=False)
data3.to_csv("./results/data3_baseline.csv",index=False)