# Project 7 with Machine Learning
## by: Aaron Pelton
## https://github.com/hrawp/datafun-07-ml


In [40]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression



: 

##  Part 1 - Chart a Straight Line

In [41]:
# initial setup
c = lambda f: 5 / 9 * (f - 32)
temps = [(f, c(f)) for f in range(0, 101, 10)]

In [None]:
# creating the line
temps_df = pd.DataFrame(temps, columns=['Fahrenheit', 'Celsius'])
axes = temps_df.plot(x='Fahrenheit', y='Celsius', style='.-')
y_label = axes.set_ylabel('Celsius')

## Part 2 - Predict Avg High Temp in NYC in January

### Section 1 - Data Acquisition

In [43]:
# Load data
nyc_df = pd.read_csv('ave_hi_nyc_jan_1895-2018.csv')

### Section 2 - Data Inspection

In [None]:
nyc_df.head()


In [None]:
nyc_df.tail()

### Section 3 - Data Cleaning

In [None]:
nyc_df.columns = ['Date', 'Temperature', 'Anomaly']
# Make the date only show the year.
nyc_df.Date = nyc_df.Date.floordiv(100)
# show the changes
nyc_df.head()

### Section 4 - Descriptive Statistics

In [None]:
# Set precision to 2
pd.set_option('display.precision', 2)
# Show the description of the data
nyc_df.Temperature.describe()

### Section 5 - Build the Model

In [None]:
# Create the y = mx + b line
linear_regression = stats.linregress(x=nyc_df.Date,y=nyc_df.Temperature)
linear_regression.slope


In [None]:
linear_regression.intercept

### Section 6 - Predict

In [None]:
# What is the predicted temperature in 2024
linear_regression.slope * 2024 + linear_regression.intercept

### Section 7 - Visualizations

In [None]:
sns.set_style('whitegrid')
axes = sns.regplot(x=nyc_df.Date, y=nyc_df.Temperature)
axes.set_ylim(10, 70)

## Part 3 - Prediction

### Section 1 - Build the Model

In [52]:
# Split the data in to test and training sets.
X_train, X_test, y_train, y_test = train_test_split(
    nyc_df.Date.values.reshape(-1, 1), nyc_df.Temperature.values,
    random_state=11)


In [None]:
# Check the shape of each data set.
X_train.shape

In [None]:
X_test.shape

In [None]:
# Train the model using Linear Regression Fit
linear_regression = LinearRegression()
linear_regression.fit(X=X_train, y=y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None)

In [None]:
# slope
linear_regression.coef_

In [None]:
# intercept
linear_regression.intercept_

### Section 2 - Test the Model

In [None]:
predicted = linear_regression.predict(X_test)
expected = y_test
for p, e in zip(predicted[::5], expected[::5]):
    print(f'predicted: {p:.2f}, expected: {e:.2f}')

## Section 3 - Predict

In [None]:
predict = (lambda x: linear_regression.coef_ * x +
           linear_regression.intercept_)

predict(2024)