In [None]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress
from pathlib import Path

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Import the csv files
clean_2020_data = Path("output/clean_2020.csv")
clean_2023_data = Path("output/clean_2023.csv")

In [None]:
# Create dataframes from the imported csv files
clean_2020_df = pd.read_csv(clean_2020_data)
clean_2023_df = pd.read_csv(clean_2023_data)

In [None]:
# Display the cleaned 2020 dataframe
clean_2020_df.head()

In [None]:
# Display the cleaned 2023 dataframe
clean_2023_df.head()

## Scatterplots & Linear Regressions 

In [None]:
# Define a function to create Linear Regression plots
def lin_reg_plot(x_values, y_values,x_label,y_label):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope, 2)) + "x + " + str(round(intercept, 2))
    print(f"The r-value is: {rvalue}")
    plt.scatter(x_values, y_values)
    plt.title(f'{y_label} vs. {x_label}')
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.plot(x_values, regress_values, "r-")
    

### Scatterplots for 2023

In [None]:
#Compare Price vs. # of Host Listings in all Cities 2023

lin_reg_plot(clean_2023_df["Number of Listings Per Host"],clean_2023_df["Price"],
"Number of Listings Per Host", "Price")
plt.savefig("images/2023scatterplot1.png")
plt.show()


In [None]:
#Compare Price vs. Minimum Night Stays in all Cities 2023
lin_reg_plot(clean_2023_df["Minimum Nights"],clean_2023_df["Price"],
"Minimum Nights", "Price")
plt.savefig("images/2023scatterplot2.png")
plt.show()

In [None]:
#Compare Price vs. Availability 2023
lin_reg_plot(clean_2023_df["Availability"],clean_2023_df["Price"],
"Availability", "Price")
plt.savefig("images/2023scatterplot3")
plt.show()


In [None]:
#Compare Price vs. # of Reviews 2023
lin_reg_plot(clean_2023_df["Number of Reviews"],clean_2023_df["Price"],
"Number of Reviews", "Price")
plt.savefig("images/2023scatterplot4")
plt.show()

#### Scatterplots and Linear Regressions for 2023 Summary
We plotted the Price in relationship to each of the numerical pieces of data in the data set (Number of listings per Host, Minimum Night stays, Days of Availability, and Number of Reviews) to determine if there was a relationship between the two variables. All plots had r-values close to 0 (ranging from -0.029 to 0.018)  indicating a low correlation between price and each of the other numerical variables. 

### Scatterplots for 2020

In [None]:
#Compare Price vs. # of Host Listings in all Cities 2020
lin_reg_plot(clean_2020_df["Number of Listings Per Host"],clean_2020_df["Price"],
"Number of Listings Per Host", "Price")
plt.savefig("images/2020scatterplot1.png")
plt.show()

In [None]:
#Compare Price vs. Minimum Night Stays in all Cities 2020
lin_reg_plot(clean_2020_df["Minimum Nights"],clean_2020_df["Price"],
"Minimum Nights", "Price")
plt.savefig("images/2020scatterplot2.png")
plt.show()

In [None]:
#Compare Price vs. Availability 2020
lin_reg_plot(clean_2020_df["Availability"],clean_2020_df["Price"],
"Availability", "Price")
plt.savefig("images/2020scatterplot3.png")
plt.show()

In [None]:
#Compare Price vs. # of Reviews 2020
lin_reg_plot(clean_2020_df["Number of Reviews"],clean_2020_df["Price"],
"Number of Reviews", "Price")
plt.savefig("images/2020scatterplot4.png")
plt.show()

#### Scatterplots and Linear Regressions for 2020 Summary
We plotted the Price in relationship to each of the numerical pieces of data in the data set (Number of listings per Host, Minimum Night stays, Days of Availability, and Number of Reviews) to determine if there was a relationship between the two variables. All plots had r-values close to 0 (ranging from -0.023to 0.062)  indicating a low correlation between price and each of the other numerical variables. 