In [13]:
import pandas as pd
import numpy as np
from scipy.stats import iqr

from ppc_db import sql_extractdata

db_args = {
    'user':'root',
    'password':'password',
    'host':'localhost',
    'port':3306,
    'database':'CSV_DB 7'
 }

In [14]:
cols = ['7_county', 'price_gbp']

query = f"SELECT {", ".join(str(x) for x in cols)} FROM Sep24pricepaidcomplete WHERE MONTH(date) < 7 and YEAR(date)=2023"
result = pd.DataFrame(sql_extractdata(query, **db_args),columns=cols)

df = (result.join
      (pd.read_csv('county_to_region.csv',
                   index_col='7_county',
                   dtype=str),
                   on='7_county')
     )

In [15]:
import plotly.express as px
fig = px.histogram(df, x="price_gbp",color='region',nbins=3000)
fig.update_layout(xaxis_range=[0,1e6],barmode='overlay')
fig.show()

In [16]:
import plotly.graph_objects as go

df_by_region = df.groupby('region')['price_gbp']


fig = px.bar(df.groupby('region')['price_gbp'].median())
fig = go.Figure(
    data=[
        go.Bar(
            x=df_by_region.median().index,
            y=df_by_region.median().values,
            name="Median",
            marker=dict(color="paleturquoise"),
        ),
    go.Bar(
        x=df_by_region.agg(iqr).index,
        y=df_by_region.agg(iqr).values,
        name="IQR",
        marker=dict(color="crimson"),
    )
    ]
)

fig.update_layout(
    legend=dict(orientation="v"),
    yaxis=dict(
        title=dict(text="Median, GBP"),
        side="left",
    ),
    yaxis2=dict(
        title=dict(text="Skew"),
        side="right",
        overlaying="y",
        tickmode="sync",
    ),
    barmode='group'
)

fig.show()

In [17]:
cols = ['YEAR(date)', '7_county', 'COUNT(`tx_id`)']

query = f"SELECT {", ".join(str(x) for x in cols)} FROM Sep24pricepaidcomplete WHERE MONTH(date) < 7 GROUP BY YEAR(date), 7_county"
result = pd.DataFrame(sql_extractdata(query, **db_args),columns=cols)

df = (result.join
      (pd.read_csv('county_to_region.csv',
                   index_col='7_county',
                   dtype=str),
                   on='7_county')
     )

df_midyr = df.groupby(['YEAR(date)','region']).sum().drop('7_county',axis=1).unstack()['COUNT(`tx_id`)']
df_midyr = df_midyr[df_midyr.index>2008]#.apply(lambda x:x/x.iloc[0])

In [18]:
cols = ['YEAR(date)', '7_county', 'COUNT(`tx_id`)']

query = f"SELECT {", ".join(str(x) for x in cols)} FROM Sep24pricepaidcomplete GROUP BY YEAR(date), 7_county"
result = pd.DataFrame(sql_extractdata(query, **db_args),columns=cols)

df = (result.join
      (pd.read_csv('county_to_region.csv',
                   index_col='7_county',
                   dtype=str),
                   on='7_county')
     )

df_fullyear = df.groupby(['YEAR(date)','region']).sum().drop('7_county',axis=1).unstack()['COUNT(`tx_id`)']
df_fullyear = df_fullyear[df_fullyear.index>2008]#.apply(lambda x:x/x.iloc[0])

In [19]:
df_fullyear = df_fullyear[df_fullyear.index<2024]

In [20]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=5,cols=2,subplot_titles=df_midyr.columns)

for index,value in enumerate(df_midyr.columns):
    row = [1,1,2,2,3,3,4,4,5,5]
    col = [1,2,1,2,1,2,1,2,1,2]
    fig.add_trace(go.Scatter(x=df_midyr[value].index,y=df_midyr[value].values),row=row[index],col=col[index])
    fig.add_trace(go.Scatter(x=df_fullyear[value].index,y=df_fullyear[value].values),row=row[index],col=col[index])

fig.update_layout(height=1000, width=800, showlegend=False,
                  title_text="Number of Transactions by Year, July YTD / Full Year")

fig.show()

In [21]:
import datetime

cols = ['MONTH(date)','YEAR(date)', '7_county', 'COUNT(`tx_id`)']

query = f"SELECT {", ".join(str(x) for x in cols)} FROM Sep24pricepaidcomplete WHERE MONTH(date) < 7 GROUP BY MONTH(date),YEAR(date), 7_county"
result = pd.DataFrame(sql_extractdata(query, **db_args),columns=cols)

df = (result.join
      (pd.read_csv('county_to_region.csv',
                   index_col='7_county',
                   dtype=str),
                   on='7_county')
     )

df = df[df['YEAR(date)'].isin([2007,2008,2009,2010,2014,2023,2024])]
df['date']=df.apply(lambda x: datetime.datetime(x['YEAR(date)'],x['MONTH(date)'],1),axis=1)
df = df.groupby(['date','region']).sum().drop('7_county',axis=1).unstack()['COUNT(`tx_id`)']


In [22]:
fig = px.line(x=df.index.month,y=df['London'], color=df.index.year)
fig.show()

In [23]:
range_list = [str(i)+"-"+str(i+249999) for i in range(0,3500000,250000)]
cols=['Year','7_county',*range_list]

count_strings = ["count(case when `price_gbp` between "+str(i) +" and "+ str(i+249999) +" then 1 end) `bucket("+str(i)+"-"+str(i+249999)+"`" for i in range(0,3500000,250000)]

sql_cols = ['YEAR(date)', '7_county']+count_strings

query = f"SELECT {", ".join(str(x) for x in sql_cols)} FROM Sep24pricepaidcomplete WHERE MONTH(date) < 7 GROUP BY YEAR(date), 7_county"
result = pd.DataFrame(sql_extractdata(query, **db_args),columns=cols)

df = (result.join
      (pd.read_csv('county_to_region.csv',
                   index_col='7_county',
                   dtype=str),
                   on='7_county')
     )


In [24]:
df.drop('7_county',axis=1,inplace=True)

In [25]:
df_Lon = df.loc[df.region=='London'].drop('region',axis=1).set_index('Year')
df_SE = df.loc[df.region=='South East'].drop('region',axis=1).set_index('Year')
df_SE = df_SE.groupby(df_SE.index).sum()

In [26]:
fig = px.line(df_Lon[df_Lon.index>2018].apply(lambda x:x/x.iloc[0]).transpose())
fig.show()

In [27]:
fig = px.line(df_SE[df_SE.index>2018].apply(lambda x:x/x.iloc[0]).transpose())
fig.show()