# Import Libraries

In [68]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent

# Scrape Data

Create selenium driver to be used in scrape

In [69]:
driver_path = '/Users/blantj/Documents/Practice/clothing_sales_time_series/chromedriver'
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=driver_path)

Define lists to be used in scrape

In [70]:
months = list(range(1,13))
temp_data = []

Define Scrape function

In [75]:
def scrape(month, df):
    driver = webdriver.Chrome(options=options, executable_path=driver_path)
    url = f'https://www.ncdc.noaa.gov/temp-and-precip/uspa/warm-cold/{month}'
    driver.get(url)
    for num in range(98, 126):
        df.append([month, 
            driver.find_element_by_xpath(f'/html/body/div[1]/div[2]/div/div[2]/div/div[2]/table/tbody/tr[{num}]/td[1]').text,
            driver.find_element_by_xpath(f'/html/body/div[1]/div[2]/div/div[2]/div/div[2]/table/tbody/tr[{num}]/td[2]').text,
            driver.find_element_by_xpath(f'/html/body/div[1]/div[2]/div/div[2]/div/div[2]/table/tbody/tr[{num}]/td[3]').text            
            ])
    return df

Run scrape function 

In [72]:
for month in months:
    temp_data = scrape(month, temp_data)

# Format Scrape Results

Create df with scrape results

In [74]:
df = pd.DataFrame(temp_data, columns=['Month', 'Year', 'Warm Days', 'Cold Days'])
df.head()

Unnamed: 0,Month,Year,Warm Days,Cold Days
0,1,1992,19.81%,0.00%
1,1,1993,3.74%,0.00%
2,1,1994,8.55%,13.84%
3,1,1995,5.27%,0.00%
4,1,1996,2.60%,0.00%


Define and apply function to create single feature representing unfavorable deviation from normal tempurature (Excesively warm in winter and excessively cold in summer)

In [83]:
def temp_deviation(row):
    if row['Month'] in range(3,9):
        return row['Cold Days']
    else:
        return row['Warm Days']
        
df['Unfavorable Days'] = df.apply(temp_deviation, axis=1)
df.head()

Unnamed: 0,Month,Year,Warm Days,Cold Days,Unfavorable Days
0,1,1992,19.81%,0.00%,19.81%
1,1,1993,3.74%,0.00%,3.74%
2,1,1994,8.55%,13.84%,8.55%
3,1,1995,5.27%,0.00%,5.27%
4,1,1996,2.60%,0.00%,2.60%


Create formatted date stamp for each row

In [112]:
def create_date(row):
    if len(str(row['Month'])) == 1:
        return row['Year'] + '-0' + str(row['Month'])+ '-01'
    else:
        return row['Year'] + '-' + str(row['Month'])+ '-01'
df['Date'] = df.apply(create_date, axis=1)
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.head()

Unnamed: 0,Month,Year,Warm Days,Cold Days,Unfavorable Days,Date
0,1,1992,19.81%,0.00%,19.81%,1992-01-01
1,1,1993,3.74%,0.00%,3.74%,1993-01-01
2,1,1994,8.55%,13.84%,8.55%,1994-01-01
3,1,1995,5.27%,0.00%,5.27%,1995-01-01
4,1,1996,2.60%,0.00%,2.60%,1996-01-01


Convert Unfavorable Days string to float

In [114]:
df['Unfavorable Days'] = df['Unfavorable Days'].apply(lambda x: float(x[:-1]))
df.head()

Unnamed: 0,Month,Year,Warm Days,Cold Days,Unfavorable Days,Date
0,1,1992,19.81%,0.00%,19.81,1992-01-01
1,1,1993,3.74%,0.00%,3.74,1993-01-01
2,1,1994,8.55%,13.84%,8.55,1994-01-01
3,1,1995,5.27%,0.00%,5.27,1995-01-01
4,1,1996,2.60%,0.00%,2.6,1996-01-01


Save df with scrape results to csv file

In [116]:
df.to_csv('Data/weather.csv', index=False)