In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import math
import warnings # Turn off warnings
warnings.filterwarnings('ignore')

In [2]:
# For ordinary-least-squares linear regression
def add_log_values(df):
    df['log_star_ratings'] = df['star_ratings'].apply(lambda x: math.log(x+1, 10))
    df['log_ticks'] = df['ticks'].apply(lambda x: math.log(x+1, 10))
    df['log_avg_stars'] = df['avg_stars'].apply(lambda x: math.log(x+1, 10))
    df['log_length'] = df['length_'].apply(lambda x: math.log(x+1, 10))
    df['log_grade'] = df['grade'].apply(lambda x: math.log(x+2, 10))
    df['log_on_to_do_lists'] = df['on_to_do_lists'].apply(lambda x: math.log(x+1, 10)) # Target
    return df

# For Poisson regression
def add_sqrt_values(df):
    df['sqrt_star_ratings'] = df['star_ratings'].apply(lambda x: math.sqrt(x))
    df['sqrt_ticks'] = df['ticks'].apply(lambda x: math.sqrt(x))
    df['sqrt_avg_stars'] = df['avg_stars'].apply(lambda x: math.sqrt(x))
    df['sqrt_length'] = df['length_'].apply(lambda x: math.sqrt(x))
    df['sqrt_grade'] = df['grade'].apply(lambda x: math.sqrt(x+1))
    return df

In [3]:
buttermilks_scrape_df = pd.read_csv(r"data/scraped-data/buttermilks-scrape.csv") # Imports
buttermilks_log_df = add_log_values(buttermilks_scrape_df)
buttermilks_log_sqrt_df = add_sqrt_values(buttermilks_log_df)
buttermilks_log_sqrt_df.to_csv(r"data/transform-data/buttermilks-transform.csv", index=None)

druid_stones_scrape_df = pd.read_csv(r"data/scraped-data/druid_stones-scrape.csv") # Imports
druid_stones_log_df = add_log_values(druid_stones_scrape_df)
druid_stones_log_sqrt_df = add_sqrt_values(druid_stones_log_df)
druid_stones_log_sqrt_df.to_csv(r"data/transform-data/druid_stones-transform.csv", index=None)

happy_boulders_scrape_df = pd.read_csv(r"data/scraped-data/happy_boulders-scrape.csv") # Imports
happy_boulders_log_df = add_log_values(happy_boulders_scrape_df)
happy_boulders_log_sqrt_df = add_sqrt_values(happy_boulders_log_df)
happy_boulders_log_sqrt_df.to_csv(r"data/transform-data/happy_boulders-transform.csv", index=None)

sad_boulders_scrape_df = pd.read_csv(r"data/scraped-data/sad_boulders-scrape.csv") # Imports
sad_boulders_log_df = add_log_values(sad_boulders_scrape_df)
sad_boulders_log_sqrt_df = add_sqrt_values(sad_boulders_log_df)
sad_boulders_log_sqrt_df.to_csv(r"data/transform-data/sad_boulders-transform.csv", index=None)

joshua_tree_scrape_df = pd.read_csv(r"data/scraped-data/joshua_tree-scrape.csv") # Imports
joshua_tree_log_df = add_log_values(joshua_tree_scrape_df)
joshua_tree_log_sqrt_df = add_sqrt_values(joshua_tree_log_df)
joshua_tree_log_sqrt_df.to_csv(r"data/transform-data/joshua_tree-transform.csv", index=None)