## MPs and Constituency Mentions Analysis

### Importing Libraries

In [2]:
import pandas as pd
from collections import defaultdict
import re

### Reading Data

In [3]:
file1_path = 'hansard-speeches-v310.csv'
file2_path = 'constitu.csv'
df1 = pd.read_csv(file1_path)
df2 = pd.read_csv(file2_path)

  df1 = pd.read_csv(file1_path)


### Data Preprocessing

In [8]:
df2.columns

# rename PCON21NM to Constituency
df2.rename(columns={'PCON21NM': 'Constituency'}, inplace=True)

In [9]:

# Convert constituency names to lowercase in df1 for matching
df1['constituency'] = df1['constituency'].str.lower()
# Also convert constituency names to lowercase in df2
df2['Constituency'] = df2['Constituency'].str.lower()


### Counting Mentions and Grouping

In [11]:
df1['speech_cleaned'] = df1['speech'].str.lower().str.replace('[^\w\s]', '').str.replace('\d+', '')

In [12]:

# Group by MP, Party, Year, and Constituency
df1_grouped = df1.groupby(['display_as', 'party', 'year', 'constituency'])

# Initialize a dictionary to hold counts
count_mentions = defaultdict(int)

# Loop through each group to count mentions
for (mp, party, year, constituency), group in df1_grouped:
    for speech in group['speech_cleaned']:
        # Count the mentions of the constituency by name or "my constituency"
        count_mentions[(mp, party, year, constituency)] += len(re.findall(fr'\b{constituency}\b', speech))
        count_mentions[(mp, party, year, constituency)] += len(re.findall(r'\bmy constituency\b', speech))


### Creating the Final DataFrame

In [15]:
df2
# rename LONG as long_con
df2.rename(columns={'LONG': 'lon_con'}, inplace=True)
# rename LAT as lat_con
df2.rename(columns={'LAT': 'lat_con'}, inplace=True)

In [19]:

# Convert the dictionary to a DataFrame
df_mentions = pd.DataFrame(list(count_mentions.keys()), columns=['MP', 'Party', 'Year', 'Constituency'])
df_mentions['No_of_mentions'] = list(count_mentions.values())

# Handle party changes
df_mentions['Party'] = df_mentions.groupby(['MP', 'Year'])['Party'].transform(lambda x: ' / '.join(x.unique()))

# Merge with latitude and longitude
df_final = pd.merge(df_mentions, df2[['Constituency', 'lat_con', 'lon_con']], how='left', on='Constituency')

# Drop duplicates and reset index
df_final = df_final.drop_duplicates().reset_index(drop=True)
df_final.head()


Unnamed: 0,MP,Party,Year,Constituency,No_of_mentions,lat_con,lon_con
0,Aaron Bell,Conservative,2020,newcastle-under-lyme,102,53.03604,-2.29439
1,Aaron Bell,Conservative,2021,newcastle-under-lyme,35,53.03604,-2.29439
2,Abena Oppong-Asare,Labour,2020,erith and thamesmead,27,51.49174,0.12906
3,Abena Oppong-Asare,Labour,2021,erith and thamesmead,6,51.49174,0.12906
4,Adam Afriyie,Conservative,2005,windsor,20,51.43494,-0.69987


In [20]:
df_final.to_csv('MPs_Constituency_Mentions.csv', index=False)

In [43]:
# show rows with nan values and order by descending Year
df_final[df_final.isna().any(axis=1)].sort_values(by=['Year'], ascending=False).Constituency.unique()


# only show rows after 2010 
df_final[df_final['Year'] > 2010].sort_values(by=['No_of_mentions'], ascending=False)

# show the count of unique constituencies after 2010
df_final[df_final['Year'] > 2010].Constituency.nunique()

# find unique constituencies in df2 
a = df2.Constituency.nunique()

# find unique constituencies in df_final
b = df_final[df_final['Year'] > 2010].Constituency.nunique()

# find unique constituencies in df2 that are not in df_final
c = df2[~df2.Constituency.isin(df_final[df_final['Year'] > 2010].Constituency)].Constituency.unique()


print(f'Number of unique constituencies in df2: {a}')
print(f'Number of unique constituencies in df_final: {b}')
print(f'Number of unique constituencies in df2 that are not in df_final: {c}')


# missing_constituencies = df2.loc[df2['Constituency'].isin(df_final['Constituency']), 'Constituency'].unique()

# missing_constituencies

Number of unique constituencies in df2: 650
Number of unique constituencies in df_final: 646
Number of unique constituencies in df2 that are not in df_final: ['belfast west' 'mid ulster' 'newry and armagh' 'west tyrone']


In [45]:
!pip install -q plotly dash pandas

In [50]:
df_final = df_final[df_final['Year'] > 2010]

In [52]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px

# Initialize the Dash app
app = dash.Dash(__name__)

# Create a bar plot using Plotly Express
fig_bar = px.bar(df_final, x='MP', y='No_of_mentions', title='Number of Mentions by MP')

# Create a map using Plotly Express
fig_map = px.scatter_geo(df_final, lat='lat_con', lon='lon_con',
                         color='No_of_mentions', title='Constituencies Mentioned',
                         projection='natural earth')

# Define the layout of the app
app.layout = html.Div([
    html.H1('MPs and Constituency Mentions Analysis'),
    html.Label('Select MP:'),
    dcc.Dropdown(
        id='mp-dropdown',
        options=[{'label': mp, 'value': mp} for mp in df_final['MP'].unique()],
        value=None 
    ),
    html.Label('Select Party:'),
    dcc.Dropdown(
        id='party-dropdown',
        options=[{'label': party, 'value': party} for party in df_final['Party'].unique()],
        value=None 
    ),
    
    html.Label('Select Year:'),
    dcc.Dropdown(
        id='year-dropdown',
        options=[{'label': year, 'value': year} for year in df_final['Year'].unique()],
        value=None 
    ),
    dcc.Graph(id='bar-plot', figure=fig_bar),
    dcc.Graph(id='map', figure=fig_map),
    html.Div(id='data-table')
])

# Define callback to update data table
@app.callback(
    Output('data-table', 'children'),
    [Input('year-dropdown', 'value'),
     Input('mp-dropdown', 'value'),
     Input('party-dropdown', 'value')]
)
def update_table(selected_year, selected_mp, selected_party):
    filtered_df = df_final[
        (df_final['Year'] == selected_year) & 
        (df_final['MP'] == selected_mp) & 
        (df_final['Party'] == selected_party)
    ]
    return html.Table([
        html.Thead([
            html.Tr([html.Th(col) for col in filtered_df.columns])
        ]),
        html.Tbody([
            html.Tr([html.Td(cell) for cell in row]) for row in filtered_df.values
        ])
    ])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)