In [1]:
from datetime import datetime, timedelta
import pandas as pd
import typing
import numpy as np

import math

## Feature Extraction (Primary + Secondary)

In [2]:
#   this function extracts different features and
#   combines them to a new dataframe
def extract_features(csv_path: str):

    df = pd.read_csv(csv_path)

    #   primary features
    df["timestamp"] = None
    df["week_day"]  = None
    df["time_slot"] = 0

    #   experimental
    df["polarity"] = 0
    df["polarity_class"] = -1
    df["polarity_comment"] = -1

    for i, row in df.iterrows():

        #   primary features
        date = row["Post Created Date"]
        time = row["Post Created Time"]

        dt_string = f"{date}::{time}"
        date_time = datetime.strptime(dt_string, "%d-%m-%y::%H:%M:%S")

        #   Even though Crowdtangle says that their exported time is in UTC
        #   it appears that it's actually UTC+1 - Crosschecked with Facebook
        timestamp = date_time + timedelta(hours=5)
        weekday = timestamp.strftime("%A").lower()
        time_slot = timestamp.hour + 1

        df.at[i, "week_day"]  = weekday
        df.at[i, "time_slot"] = time_slot
        df.at[i, "timestamp"] = timestamp

        #   -----------------------------------------------
        #   -----------------------------------------------
        #   experimental
        react_pos = row["Love"] + row["Care"] + row["Wow"]
        react_neg = row["Haha"] + row["Angry"] + row["Sad"]
        
        polarity = (react_pos - react_neg)/(react_pos + react_neg)
        df.at[i, "polarity"] = polarity
        polarity_class = -1

        polarity_comment = (row["comment_pos"] - row["comment_neg"])/(row["comment_pos"] + row["comment_neg"])
        df.at[i, "polarity_comment"] = polarity_comment

        # # #   binary polarity class
        # if -1 <= polarity < 0:
        #     polarity_class = 0
        # elif 0 <= polarity <= 1:
        #     polarity_class = 1

        # # #   binary polarity class
        # if -1 <= polarity < -0.5:
        #     polarity_class = 0
        # elif -0.5 <= polarity <= 0:
        #     polarity_class = 1
        # elif 0 <= polarity <= 0.5:
        #     polarity_class = 2
        # elif 0.5 <= polarity <= 1:
        #     polarity_class = 3

        #   N_polarity_class = 8
        if   -1 <= polarity < -0.75:
            polarity_class = 0
        elif -0.75 <= polarity < -0.5:
            polarity_class = 1
        elif -0.5 <= polarity < -0.25:
            polarity_class = 2
        elif -0.25 <= polarity < 0:
            polarity_class = 3
        elif 0 <= polarity < 0.25:
            polarity_class = 4
        elif 0.25 <= polarity < 0.5:
            polarity_class = 5
        elif 0.5 <= polarity < 0.75:
            polarity_class = 6
        elif 0.75 <= polarity <= 1:
            polarity_class = 7

        if polarity_class == -1:
            print("\n>> ERROR at", i, "polarity class =", polarity_class)

        df.at[i, "polarity_class"] = polarity_class

        #   -----------------------------------------------
        #   -----------------------------------------------

    print("New features extracted!")

    csv_path = csv_path.replace(".csv", "")
    df.to_csv(f"{csv_path}_new.csv", index=False)

    print(f"New CSV saved as {csv_path}_new.csv")

## Extraction on all CSV

In [3]:
#   main part
csv_list = [
    "../khudalagse/final.csv",
    "../metroman/final.csv",
    "../petuk-couple/final.csv",
    "../rafsan/final.csv",
    "../zoltanbd/final.csv",
]

for each_csv in csv_list:
    print("\nCurrently in csv:", each_csv)
    extract_features(each_csv)

print("\nAll features extracted!\n")


Currently in csv: ../khudalagse/final.csv
New features extracted!
New CSV saved as ../khudalagse/final_new.csv

Currently in csv: ../metroman/final.csv
New features extracted!
New CSV saved as ../metroman/final_new.csv

Currently in csv: ../petuk-couple/final.csv
New features extracted!
New CSV saved as ../petuk-couple/final_new.csv

Currently in csv: ../rafsan/final.csv
New features extracted!
New CSV saved as ../rafsan/final_new.csv

Currently in csv: ../zoltanbd/final.csv
New features extracted!
New CSV saved as ../zoltanbd/final_new.csv

All features extracted!

