In [1]:
## library imports

## db connection params
from config import get_engine

## IO
import pandas as pd

## linear algebra
import numpy as np

## regex 
import re 

## timimg
import time

from processing import strip, replace_empty_string,map_season,replace_finished_game_vals

In [2]:
## db engine
engine = get_engine("landing")
processing_engine = get_engine('processing')

In [3]:
## getting curling linescores from raspberry pi MySql DB

scores = pd.read_sql("SELECT * FROM Scores;",con=engine)

In [4]:
ends = ["End{}".format(str(end)) for end in range(1,13)]

In [5]:
## strip all end scores 

scores[ends] = scores[ends].applymap(strip)

In [6]:
## replace empty strings with null values

scores[ends] = scores[ends].applymap(replace_empty_string)

In [7]:
## replace 'X'

scores[ends] = scores[ends].applymap(replace_finished_game_vals)

In [8]:
## add year

pat = r".*(\d{4})"
years = scores['EventDates'].str.extract(pat=pat)
assert years.isnull().sum().sum() == 0
scores['Year'] = years.copy().astype('int')

In [9]:
## add start month

pat = r"^(\w*)"
start_month = scores['EventDates'].str.extract(pat=pat)
assert start_month.isnull().sum().sum() == 0
scores['StartMonth'] = start_month.copy()

In [10]:
## add end month

pat = r"^.*-\s([\D][^\d\s]*)"
scores['EndMonth'] = scores['EventDates'].str.extract(pat=pat,expand=False)
scores['EndMonth'] = scores['EndMonth'].combine_first(scores['StartMonth'])

In [11]:
## start day

pat = r"^\D{1,6}\s(\d*)"

start_day = scores['EventDates'].str.extract(pat=pat,expand=False)
assert start_day.isnull().sum() == 0

scores['EventDayStart'] = start_day.copy().astype(int)

In [12]:
## end day


pat = r"^\w*\s*\d*\s*-\s*(?:[a-zA-Z]*)?\s*(\d*)"
end_day = scores['EventDates'].str.extract(pat=pat,expand=False)
scores['EventDayEnd'] = end_day.copy()
scores['EventDayEnd'] = scores['EventDayEnd'].combine_first(scores['EventDayStart']).astype(int)

In [13]:
## get draw 

pat = r"^.*\s(.*)"
draw = scores['Draw'].str.extract(pat=pat,expand=False)
assert draw.isnull().sum() == 0

scores['Draw'] = draw.copy()

In [14]:
## get TeamID

pat = r"^.*teamid=(\d*)"
team_ids = scores['TeamLink'].str.extract(pat=pat,expand=False)
assert team_ids.isnull().sum() == 0

scores['TeamID'] = team_ids.copy()

In [15]:
## check datatypes

scores.dtypes

GameID            int64
EventID           int64
EventDates       object
EventName        object
Draw             object
End1             object
End2             object
End3             object
End4             object
End5             object
End6             object
End7             object
End8             object
End9             object
End10            object
End11            object
End12            object
Hammer            int64
FinalScore        int64
Team             object
URL              object
TeamLink         object
DrawNum           int64
Year              int32
StartMonth       object
EndMonth         object
EventDayStart     int32
EventDayEnd       int32
TeamID           object
dtype: object

In [16]:
## drop cols

drop_cols = ['EventDates']
scores = scores.drop(drop_cols,axis=1)

In [17]:
## sql

index = False
if_exists = 'append'
scores.to_sql("Scores",con=processing_engine,index=index,if_exists=if_exists)