# Library

In [80]:
import pandas as pd
import numpy as np
import json
from os import listdir

Save the raw data to csv and get its important features (date, spend, ctr, and cpc)

# ----------------------------------------------Preprocessing----------------------------------------

In [81]:
def json_to_csv(path = 'fb ads/'): #path is the folder of the json file lists
    listfile = listdir(path)

    data_list = []
    #open all the data, and save it to fb_ads_raw for training
    for ctr, i in enumerate(listfile):
        with open(path + i, 'r', encoding='utf8') as data_file:        
            for line in data_file:
                data = json.loads(line)
                #pick the important features
                data_list.append([data["date_start"], data["spend"], data["ctr"], data["cpc"]])
    print ("Files from the folder-%s that contains-%d files are succesfully loaded..." % (path, ctr) )
    data_list = pd.DataFrame(data_list)
    data_list.columns = ["date","spend", "ctr", "cpc"]
    data_list.to_csv("fb_ads_raw.csv", header = True, encoding="utf-8", index=False)

Prepare_data is used to:
- get the data
- add season and day columns to the data
- group the data by date

In [110]:
def prepare_data():
    csv_raw = pd.read_csv("fb_ads_raw.csv")
    csv_raw.date = pd.to_datetime(csv_raw['date'])

    #get detailed season and day_of_week from the date function
    month = csv_raw.date.dt.month
    day = csv_raw.date.dt.dayofweek

    #change months into season format
    conditions = [ (month >= 9), (month >= 6), (month >= 3)]
    choices = [3, 2, 1]
    season = np.select(conditions, choices, default=0)

    #insert the day and the season columns
    csv_raw.insert(1,'day', day)
    csv_raw.insert(0,'season', season)
    grouped = csv_raw.groupby('date').mean()
    return grouped

Get the important features (i.e.):
- spend
- ctr
- season
- day

In [111]:
def important_features(csv_raw):
    important_features = csv_raw.spend.values.reshape(-1,1)
    important_features = np.append(important_features, csv_raw.ctr.values.reshape(-1,1), axis = 1)
    important_features = np.append(important_features, pd.get_dummies(csv_raw.season), axis = 1)
    important_features = np.append(important_features, pd.get_dummies(csv_raw.day), axis = 1)
    important_features = np.append(important_features, csv_raw.cpc.values.reshape(-1,1), axis = 1)
    print (important_features.shape)
    return important_features

Get the features and the label data

In [114]:
def preprocess_data():
    csv_raw = prepare_data()   
    
    #normalize the ctr and spend (make it into range 0 - 1)
    csv_raw.spend = (csv_raw.spend-csv_raw.spend.min())/(csv_raw.spend.max()-csv_raw.spend.min())
    csv_raw.ctr = (csv_raw.ctr-csv_raw.ctr.min())/(csv_raw.ctr.max()-csv_raw.ctr.min())
    
    #get  the features, and return them (spend, ctr, season, and day)
    features = pd.DataFrame( important_features(csv_raw) )
    
    #save data to csv
    features.to_csv("fb_ads_preprocess.csv",index=False, header=False, encoding='utf-8')
    print ("Preprocessing is successful!")

# ---------------------------------------------Main Function------------------------------------------

In [117]:
def get_data_preprocessing(path = 'fb ads/'):
    json_to_csv(path)
    preprocess_data()
#get_data_preprocessing()

Files from the folder-fb ads/ that contains-22 files are succesfully loaded...
(510, 5)
(510, 14)
Preprocessing is successful!
