<a href="https://colab.research.google.com/github/jimbojamborine/Projects/blob/main/UK_House_Prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [120]:
import pandas as pd
from datetime import timedelta
import plotly.express as px

#Link to Gov HPI data
url = 'http://publicdata.landregistry.gov.uk/market-trend-data/house-price-index-data/UK-HPI-full-file-2022-03.csv'

df = pd.read_csv(url)

#Turn to datetime
df['datetime'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')

#get date values and sort for querying
dates = df['datetime'].unique()
date_df = pd.DataFrame(dates,columns=['dates'])

date_df = date_df.sort_values('dates',ascending=False)

d_1 = date_df['dates'].iloc[36]
d_2 = date_df['dates'].iloc[0]

#set date values
pre_df = df[df['datetime'] == d_1]
now_df = df[df['datetime']== d_2]

#set dataframes to compare
pre_df = pre_df[
                ['datetime',
                 'RegionName',
                 'AreaCode',
                 'Index',
                 'AveragePrice'
                 ]
                ]
now_df = now_df[
                ['RegionName',
                 'Index',
                 'AveragePrice'
                 ]
                ]

#join to master df
df_j = pre_df.merge(now_df, on='RegionName',how='left')

#calculate change between two dfs
df_j['HPI Change'] = (df_j['Index_y'] - df_j['Index_x']) / df_j['Index_x'] * 100
df_j['Avg Price Change'] = (df_j['AveragePrice_y'] - df_j['AveragePrice_x'])

change_df = df_j[
                 ['RegionName', 
                  'AreaCode', 
                  'HPI Change',
                  'Avg Price Change',
                  'AveragePrice_y'
                  ]
                 ]

change_df['country'] = change_df['AreaCode'].str[0]

#remove aggregate country
change_df = change_df.loc[change_df["country"] !='K']

#rename countries
change_df['country'].replace('S','Scotland',inplace=True)
change_df['country'].replace('W','Wales',inplace=True)
change_df['country'].replace('N','Northern Ireland',inplace=True)
change_df['country'].replace('E','England',inplace=True)

#create list of countries for loop
countries = change_df['country'].unique()

for country in countries:
  temp = change_df[change_df['country']==country]

  #visualistion
  fig = px.scatter(temp, 
                   x='Avg Price Change', 
                   y='HPI Change', 
                   title= country + ' Average House Price Change vs HPI Change By Region - ' + d_2.strftime('%Y/%m') + " vs " + d_1.strftime('%Y/%m'), 
                   color='RegionName', 
                   size='AveragePrice_y', 
                   hover_data=['RegionName','HPI Change','Avg Price Change']
                   )
  fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

