#### Imports

In [78]:
# Import necessary packages.
from PIL import Image
import pandas as pd
import os
import urllib.request
import math

#### Create DataFrame for Outputs

In [79]:
# Our output file with possible likelihood ratings as headers.
header_names = ["date", "unlikely", "possible", "likely", "very_likely", "certain"]

# Add headers.
df = pd.DataFrame(columns=header_names)

In [80]:
# Our total span, covering multiple seasons.
years = ("17", "18", "19")

#### One-hot encode Likelihood

In [81]:
for year in years:
    title = "likelihoods_" + year + ".csv"
    likelihoods = pd.read_csv(title)

    # Options for likelihood: unlikely, possible, likely, very likely, certain.
    for index, row in likelihoods.iterrows():
        date = row["date"][40:]

        # One-hot encode likelihood.
        unlikely = [date, 1, 0, 0, 0, 0]
        possible = [date, 0, 1, 0, 0, 0]
        likely = [date, 0, 0, 1, 0, 0]
        very_likely = [date, 0, 0, 0, 1, 0]
        certain = [date, 0, 0, 0, 0, 1]
        
        # Check to make sure likelihood ratings were properly scraped.
        likelihood = 0
        if not pd.isna(row["very_likely"]):
            likelihood = row["very_likely"]
        elif not pd.isna(row["likely"]):
            likelihood = row["likely"]
        elif not pd.isna(row["possible"]):
            likelihood = row["possible"]
        elif not pd.isna(row["unlikely"]):
            likelihood = row["unlikely"]
        
        # Likelihood rating is the last character of the scraped string.
        if likelihood != 0:
            if likelihood[-1] == "5":
                entry = certain
            elif likelihood[-1] == "4":
                entry = very_likely
            elif likelihood[-1] == "3":
                entry = likely
            elif likelihood[-1] == "2":
                entry = possible
            elif likelihood[-1] == "1":
                entry = unlikely         
            df.loc[len(df)] = entry

#### Sort DataFrame by date

In [82]:
# Sort by date.
df['date'] = pd.to_datetime(df.date, infer_datetime_format = True)
df.sort_values(by = 'date', ascending = True, inplace = True)

# Turn DataFrame into CSV.
df.to_csv("likelihoods.csv", index=False)