# Test for Batch Procedure

In [None]:
import pandas as pd
import logging
from modules import params

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

In [None]:
class Batch():

    def __init__(self):
        self.dfs = {}

    def _load_data(self, path):
        logging.info(f"Loading CSV file ({path})...")
        self.dfs['-'] = pd.read_csv(path)
        logging.info(f"CSV file loaded succesfully!")
    
    def _classify_sentiment(self, score):
        if score == None:
            return None

        if score <= params.SENTIMENT_THRESHOLDS['t1']:
            sentiment = 'Strongly Negative'
            return sentiment

        elif score > params.SENTIMENT_THRESHOLDS['t1'] and score <= params.SENTIMENT_THRESHOLDS['t2']:
            sentiment = 'Negative'
            return sentiment
        
        elif score > params.SENTIMENT_THRESHOLDS['t2'] and score <= params.SENTIMENT_THRESHOLDS['t3']:
            sentiment = 'Neutral'
            return sentiment
        
        elif score > params.SENTIMENT_THRESHOLDS['t3'] and score <= params.SENTIMENT_THRESHOLDS['t4']:
            sentiment = 'Positive'
            return sentiment
        
        else:
            sentiment = 'Strongly Positive'
        
        return sentiment
    
    def _add_qualitative_score(self):
        logging.info('Extracting qualitative score...')
        self.dfs['-']['QUALITATIVE_SCORE'] = self.dfs['-']['SENTIMENT_SCORE'].apply(self._classify_sentiment)
        logging.info('Qualitative score extracted succesfully!')
    
    def _filter_by_group(self):

        for group in params.SEMANTIC_GROUPS['GROUPS'].values():
            if group != None:
                logging.info(f'Filtering data by group ({group})...')
                self.dfs[group] = self.dfs['-'][self.dfs['-']['FILTERED_TEXT'].str.contains(group)]

        logging.info('Data filtered succesfully!')
    
    def _aggregate(self):
        self.aggregated_dfs = {}
        for group, df in self.dfs.items():
            logging.info(f'Aggregating data for group ({group})...')
            groups = df.groupby('DATE')
            average_score = groups['SENTIMENT_SCORE'].mean().rename('Sentiment_Score')
            total_volume = groups['FILTERED_TEXT'].count().rename('Count')
            class_counts = groups['QUALITATIVE_SCORE'].value_counts().unstack(level=1)
            self.aggregated_dfs[group] = pd.concat([average_score, total_volume, class_counts], axis=1)
        logging.info(f'Data aggregated succesfully!')

    def run_procedure(self, path_to_data):
        logging.info('Batch procedure initialized.')
        self._load_data(path_to_data)
        self._add_qualitative_score()
        self._filter_by_group()
        self._aggregate()


In [71]:
batch = Batch()
batch.run_procedure('data/full_data.csv')

2024-11-21 15:07:21.094 INFO 2455888772 - run_procedure: Batch procedure initialized.
2024-11-21 15:07:21.094 INFO 2455888772 - _load_data: Loading CSV file (data/full_data.csv)...
2024-11-21 15:08:21.920 INFO 2455888772 - _load_data: CSV file loaded succesfully!
2024-11-21 15:08:21.921 INFO 2455888772 - _add_qualitative_score: Extracting qualitative score...
2024-11-21 15:08:30.004 INFO 2455888772 - _add_qualitative_score: Qualitative score extracted succesfully!
2024-11-21 15:08:30.004 INFO 2455888772 - _filter_by_group: Filtering data by group (immigra)...
2024-11-21 15:08:40.602 INFO 2455888772 - _filter_by_group: Filtering data by group (stranier)...
2024-11-21 15:08:49.219 INFO 2455888772 - _filter_by_group: Filtering data by group (clandestin)...
2024-11-21 15:08:57.166 INFO 2455888772 - _filter_by_group: Data filtered succesfully!
2024-11-21 15:08:57.167 INFO 2455888772 - _aggregate: Aggregating data for group (-)...
2024-11-21 15:09:01.176 INFO 2455888772 - _aggregate: Aggrega

In [77]:
batch.aggregated_dfs['stranier']

Unnamed: 0_level_0,Sentiment_Score,Count,Negative,Neutral,Positive,Strongly Negative,Strongly Positive
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-01,-0.026167,815,316.0,126.0,262.0,98.0,13.0
2018-01-02,-0.027126,1470,688.0,87.0,514.0,152.0,29.0
2018-01-03,-0.016120,1671,820.0,45.0,693.0,99.0,14.0
2018-01-04,-0.024969,1808,929.0,250.0,419.0,175.0,35.0
2018-01-05,-0.029538,1887,942.0,395.0,268.0,244.0,38.0
...,...,...,...,...,...,...,...
2022-12-27,-0.044225,1617,545.0,255.0,333.0,478.0,6.0
2022-12-28,-0.038683,1019,440.0,119.0,185.0,245.0,30.0
2022-12-29,0.006377,3553,1071.0,439.0,1922.0,90.0,31.0
2022-12-30,-0.006724,1019,396.0,145.0,340.0,92.0,46.0
