# Get actor characteristics
In this file, we add the actor demographics (age, sex, race, and ethnicity) to the data files that contain the mid features.

## Load modules

In [None]:
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 
from pyAudioAnalysis import MidTermFeatures as mF
import numpy as np 
import plotly.graph_objs as go 
import plotly
import IPython

import pandas as pd

import os, shutil
import time

import matplotlib.pyplot as plt

import random

## Load data

In [None]:
# Load the file that contains actor ids and characteristics
actor_data = pd.read_csv('../VideoDemographics.csv')

In [None]:
# Load the mid term features data for the training and test sets
mid_data_train = pd.read_csv('Data/Mid_features/midFeaturesTrainSetFinal2.csv')
mid_data_test = pd.read_csv('Data/Mid_features/midFeaturesTestSetFinal2.csv')

## Extract demographics
Use the Data Frame `actor_data` to build dicts mapping each `Actor_ID` to the actor's demographics.

In [None]:
age_dict = {}
sex_dict = {}
race_dict = {}
ethnicity_dict = {}

for i in actor_data.index:
    actor_id = actor_data.at[i, 'ActorID']
    actor_age = actor_data.at[i, 'Age']
    actor_sex = actor_data.at[i, 'Sex']
    actor_race = actor_data.at[i, 'Race']
    actor_ethnicity = actor_data.at[i, 'Ethnicity']
    age_dict[actor_id] = actor_age
    sex_dict[actor_id] = actor_sex
    race_dict[actor_id] = actor_race
    ethnicity_dict[actor_id] = actor_ethnicity

To ensure that we are adding the correct demographics to each row of `mid_data_train` and `mid_data_test`, we build lists of demographics in the same order that actors appear in those dataframes.

For example, if the first three ActorIDs in `mid_data_train` are `1001`, `1005`, `1008`, the lists have the demographics for those actors in the first three entries.

In [None]:
# Get lists of characteristics in the order actors appear in mid_data_train:
age_list_train = []
sex_list_train = []
race_list_train = []
ethnicity_list_train = []


for j in mid_data_train.index:
    age = age_dict[mid_data_train.at[j, 'ActorID']]
    age_list_train.append(age)
    
    sex = sex_dict[mid_data_train.at[j, 'ActorID']]
    sex_list_train.append(sex)
    
    race = race_dict[mid_data_train.at[j, 'ActorID']]
    race_list_train.append(race)
    
    ethnicity = ethnicity_dict[mid_data_train.at[j, 'ActorID']]
    ethnicity_list_train.append(ethnicity)
    
# Get lists of characteristics in the order actors appear in mid_data_test:
age_list_test = []
sex_list_test = []
race_list_test = []
ethnicity_list_test = []


for j in mid_data_test.index:
    age = age_dict[mid_data_test.at[j, 'ActorID']]
    age_list_test.append(age)
    
    sex = sex_dict[mid_data_test.at[j, 'ActorID']]
    sex_list_test.append(sex)
    
    race = race_dict[mid_data_test.at[j, 'ActorID']]
    race_list_test.append(race)
    
    ethnicity = ethnicity_dict[mid_data_test.at[j, 'ActorID']]
    ethnicity_list_test.append(ethnicity)

## Add demographics

In [None]:
# Add the characteristics columns to mid_data_train:
mid_data_train.insert(4, 'Age', age_list_train)
mid_data_train.insert(5, 'Sex', sex_list_train)
mid_data_train.insert(6, 'Race', race_list_train)
mid_data_train.insert(7, 'Ethnicity', ethnicity_list_train)

In [None]:
mid_data_train.head()

In [None]:
# Add the characteristics columns to mid_data_test:
mid_data_test.insert(4, 'Age', age_list_test)
mid_data_test.insert(5, 'Sex', sex_list_test)
mid_data_test.insert(6, 'Race', race_list_test)
mid_data_test.insert(7, 'Ethnicity', ethnicity_list_test)

In [None]:
mid_data_test.head()

## Save files

In [None]:
# Save the files to csv
mid_data_train.to_csv('midFeaturesTrainSetWithChars.csv', index=False)
mid_data_test.to_csv('midFeaturesTestSetWithChars.csv', index=False)