In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from pandas.plotting import scatter_matrix
from pathlib import Path
from scipy.stats import linregress
import plotly.express as px

In [None]:
def linear_regression_plot(x, y):
    # Perform linear regression
    pe_slope, pe_int, pe_r, pe_p, pe_std_err = linregress(x, y.astype(int))
    regression_line = pe_slope * np.array(x) + pe_int

    print(f'The r-value is: {round(pe_r,2)}')

    # Plotting
    plt.scatter(x, y, label='Data Points')
    plt.plot(x, regression_line, color='red', label='Linear Regression')
    plt.annotate(f'y = {round(pe_slope,2)}x + {round(pe_int,2)}', xy=(min(x)+5,min(y)), 
                 xycoords='data', color='red', size=15)

In [None]:
merge_data = Path("../Resources/merge_file.csv")
merge_df = pd.read_csv(merge_data)
merge_df.head()

In [None]:
state_correlation = []
p_value_correlation = []

for k in merge_df["State"].index:
    homelesess = merge_df.iloc[k,1:12].values
    rental_price=merge_df.iloc[k,12:24].values
    pe_slope, pe_int, pe_r, pe_p, pe_std_err = linregress(homelesess.astype(int), rental_price.astype(int))
    state_correlation.append(pe_r)
    p_value_correlation.append(pe_p)
    
state_correlation

In [None]:
fig = px.choropleth(locations=merge_df["State"], locationmode="USA-states", 
                    color=state_correlation, scope="usa", 
                    color_continuous_scale="Viridis")
fig.update_layout(margin=dict(l=60, r=60, t=50, b=50))
fig.layout.coloraxis.colorbar.title = 'Correlation Coefficients'
fig.show()

In [None]:
fig = px.choropleth(locations=merge_df["State"], locationmode="USA-states", 
                    color=p_value_correlation, scope="usa", 
                    color_continuous_scale=[(0, "gray"), (0.1, "yellow"), (1, "purple")])
fig.update_layout(margin=dict(l=60, r=60, t=50, b=50))
fig.layout.coloraxis.colorbar.title = 'p-value'
fig.show()

In [None]:
ca_idx = merge_df.loc[merge_df["State"]=='CA'].index
ca_idx = ca_idx[0]
homelesess_CA = merge_df.iloc[ca_idx,1:12].values
rental_price_CA=merge_df.iloc[ca_idx,12:24].values
linear_regression_plot(homelesess_CA.astype(int), rental_price_CA.astype(int))
plt.title("California")
plt.xlabel("Homelesess")
plt.ylabel("Rental price")
plt.show()