In [2]:
import pandas as pd
import numpy as np
import altair as alt
import geopandas as gpd
import eco_style
alt.themes.enable("light")

ThemeRegistry.enable('light')

# Trade in Services

In [2]:
df = pd.read_excel('servicetypebycountry2024q4.xlsx', sheet_name='Sheet 1. Time Series', skiprows=2)
df = df.query("`Country` == 'France'")
df = df[["Direction", "Service type code", "Service type", "2023"]]
df = df.rename(columns={"2023": "value"})
full_df = df.copy()


In [59]:

df = full_df.copy()

df['value'] = pd.to_numeric(df['value'], errors='coerce')

# cols: Service type code, Service type, Export, Import
df = df.pivot_table(index=["Service type code", "Service type"], columns="Direction", values="value", fill_value=0).reset_index()

df['Service type code'] = pd.to_numeric(df['Service type code'], errors='coerce')
# is the serivce type code an integer?
df['is_top_level'] = df['Service type code'].apply(lambda x: x.is_integer() if pd.notnull(x) else False)
df = df[df['is_top_level']]
df['total'] = df['Exports'] + df['Imports']
df['balance'] = df['Exports'] - df['Imports']
df = df.sort_values(by='total', ascending=False)

total = df.iloc[0]

df = df.iloc[1:]

df['pct'] = df['total'] / total['total'] * 100
df['group'] = df['Service type']
df.loc[df.iloc[6:].index, 'group'] = 'Other'
df = df.groupby(['group']).sum().reset_index().drop(columns=['Service type code', 'Service type'])
df = df.sort_values(by='total', ascending=False)
df['group'].unique()

rebinds = {
    "Telecommunications, computer and information services": "Telecommunications & IT",
}

df['group'] = df['group'].replace(rebinds)
# df['group'] = df['group'].apply(lambda x: x.split('\n'))

df['Imports'] = -df['Imports'].astype(int)

df = df[['group', 'total', 'Imports', 'Exports', 'balance', 'pct']]
df['balance'] = df['balance'].astype(int)

group_order = df.sort_values(by='balance', ascending=False)['group'].tolist()
df = df.melt(id_vars=['group', 'total', 'balance', 'pct'], var_name='Direction', value_name='value')

#df['Direction'] = df['Direction'].replace({'Imports': 'France to UK', 'Exports': 'UK to France'})

base = alt.Chart(df).encode(
    y=alt.Y('group:N', title=None, sort=group_order),
    x=alt.X('value:Q', title='', axis=alt.Axis(labelExpr="'£' + format(abs(datum.value), ',.0f')")),
    color=alt.Color('Direction:N', title=None),
    tooltip=[
        alt.Tooltip('group:N', title='Service Type'),
        alt.Tooltip('total:Q', title='Total Value'),
        alt.Tooltip('pct:Q', title='Percentage of Total (%)')
    ]
)

bars = base.mark_bar()

balance_points = base.transform_filter(alt.datum.Direction == 'Exports').mark_point(
    filled=True,
    size=100,
    color='black',
    shape='cross'
).encode(
    x=alt.X('balance:Q', title='Balance'),
    y=alt.Y('group:N', title=None, sort=group_order),
    color=alt.value('black'),
    tooltip=[
        alt.Tooltip('group:N', title='Service Type'),
        alt.Tooltip('balance:Q', title='Balance Value')
    ]
)    

chart = (bars + balance_points).properties(
    height=400,
    width=300,
)

chart.save("france_gbr_service_type_balance.json")
chart.save("france_gbr_service_type_balance.png", scale_factor=2)   
chart





  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# French people in the UK

In [3]:
gdf = gpd.read_file("Local_Authority_Districts_December_2021_GB_BUC_2022_8726906374457439708.geojson")

gdf['UK_country'] = gdf['LAD21CD'].str[:1]
full_gdf = gdf.copy()
full_gdf = full_gdf.to_crs("EPSG:4326")
if full_gdf.crs is None:
    full_gdf.set_crs("EPSG:27700", inplace=True)  # or whatever it originally is (e.g., British National Grid)

# Convert to WGS84 for plotting if needed
full_gdf = full_gdf.to_crs("EPSG:4326")




In [4]:
df = pd.read_excel("country_of_birth_2021_TS012-2021-2.xlsx")
df.columns = ["id", "LAD", "code", "country", "pop"]
df = df.groupby(["id", "LAD",]).sum().reset_index()
eng_pop_df = df[["id", "pop"]]
eng_pop_df

df = pd.read_excel("country_of_birth_2021_TS012-2021-2.xlsx")
df.columns = ["id", "LAD", "code", "country", "value"]

df = df[df['country'].str.contains('France')]

df = df.merge(eng_pop_df, on='id', how='left')
df['pct'] = df['value'] / df['pop'] 

eng_df = df.copy()

In [5]:
df = pd.read_excel("scotland_by_country_of_birthtable_2025-05-27_17-10-15.xlsx", skiprows=10).iloc[1:, 1:]
df.columns = ['LAD'] + df.columns[1:].to_list()
df = df[['LAD']+[c for c in df.columns if 'France' in c or c=="All people"]]
df.columns = ['LAD', 'pop', 'value',]
df = df.dropna(subset=['value'])
df['pct'] = df['value'] / df['pop']
scot_df = df.copy()

In [6]:
df = pd.concat([eng_df, scot_df], ignore_index=True)
df = df[['LAD', 'id', 'value', 'pop', 'pct']]
df = df.merge(gdf[['LAD21NM', 'LAD21CD']], left_on='LAD', right_on='LAD21NM', how='left')
df['id'] = df['id'].fillna(df.LAD21CD)
df = df[['LAD', 'id', 'value', 'pop', 'pct']]
df

Unnamed: 0,LAD,id,value,pop,pct
0,Hartlepool,E06000001,15.0,92340.0,0.000162
1,Middlesbrough,E06000002,72.0,143925.0,0.000500
2,Redcar and Cleveland,E06000003,49.0,136532.0,0.000359
3,Stockton-on-Tees,E06000004,85.0,196583.0,0.000432
4,Darlington,E06000005,60.0,107807.0,0.000557
...,...,...,...,...,...
358,East Dunbartonshire,S12000045,120.0,108937.0,0.001102
359,Fife,S12000047,481.0,371781.0,0.001294
360,Perth and Kinross,S12000048,200.0,150953.0,0.001325
361,Glasgow City,S12000049,1688.0,620756.0,0.002719


In [7]:
uk_base_gdf = gpd.read_file("UK_Lads.geoJson")
uk_base_gdf['country'] = uk_base_gdf['LAD21CD'].str[:1]
#exclude NI
uk_base_gdf = uk_base_gdf[uk_base_gdf['country'] != 'N']
uk_base_map = alt.Chart(uk_base_gdf).mark_geoshape(
    fill='rgb(240,240,240)',
    stroke='rgb(225,225,225)',
    strokeWidth=0.5
)    

uk_base_map

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [8]:

gdf = full_gdf.copy().merge(df, left_on="LAD21CD", right_on="id", how="left").drop(columns=["LAD"])


chart = alt.Chart(gdf.dropna(subset=['pct', 'LAT', 'LONG'])).mark_circle(
    stroke='black',
    strokeWidth=0.1,
    opacity=0.8,
).encode(
    latitude='LAT',
    longitude='LONG',
    color=alt.Color('pct:Q', 
                    scale=alt.Scale(range=["#e0f7f6", "#80dad7", "#36B7B4"]),
                    legend=alt.Legend(
                        title="",
                        format='%'
                    ),
                    title=''),
    size=alt.Size('value:Q', title='', 
                  legend=alt.Legend(labelExpr="format(datum.value, ',.0f') + (datum.value == 15000 ? ' Britons' : '')")),  
    # tooltip=[
    #     alt.Tooltip('code:N', title='Code'),
    #     alt.Tooltip('nominal:Q', title='Number of Britons'),
    #     alt.Tooltip('pct:Q', title='Percentage of Population')
    # ]
)

chart = (uk_base_map + chart)
chart.save("French_in_Britain.json")
chart.save("French_in_Britain.png", scale_factor=4)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

# FDI

In [149]:
gdf


Unnamed: 0,OBJECTID,LAD21CD,LAD21NM,LAD21NMW,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry,UK_country,id,value,pop,pct
0,1,E06000001,Hartlepool,,447160,531474,-1.27018,54.67614,b3bda806-6f05-4b60-b7f9-e695b3c770f4,"POLYGON ((-1.24224 54.72297, -1.24194 54.72272...",E,E06000001,15.0,92340.0,0.000162
1,2,E06000002,Middlesbrough,,451141,516887,-1.21099,54.54467,9e361604-f9c3-4c0f-9acd-37eb3a62900a,"POLYGON ((-1.1986 54.58287, -1.16664 54.55423,...",E,E06000002,72.0,143925.0,0.000500
2,3,E06000003,Redcar and Cleveland,,464361,519597,-1.00608,54.56752,22633e34-071d-4d40-afc7-a6263ced0f47,"POLYGON ((-0.79189 54.55824, -0.80042 54.55101...",E,E06000003,49.0,136532.0,0.000359
3,4,E06000004,Stockton-on-Tees,,444940,518183,-1.30664,54.55691,f210d40d-5db4-4d4b-a9c8-f7b7f8b2b1b8,"POLYGON ((-1.19319 54.62905, -1.20018 54.6235,...",E,E06000004,85.0,196583.0,0.000432
4,5,E06000005,Darlington,,428029,515648,-1.56835,54.53534,35efd5a3-5c86-4c7d-8f2c-7dfbce2bdaf7,"POLYGON ((-1.43836 54.59508, -1.42333 54.60313...",E,E06000005,60.0,107807.0,0.000557
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,359,W06000020,Torfaen,Torfaen,327459,200480,-3.05101,51.69836,3a825cda-a72b-4380-a925-576af958cbaa,"POLYGON ((-2.95891 51.62878, -2.98933 51.62876...",W,W06000020,30.0,92275.0,0.000325
359,360,W06000021,Monmouthshire,Sir Fynwy,337812,209231,-2.90280,51.77827,fb9492e1-ac81-4e30-a971-9ac04e16ddb7,"POLYGON ((-2.6504 51.82612, -2.66087 51.82275,...",W,W06000021,94.0,92959.0,0.001011
360,361,W06000022,Newport,Casnewydd,337897,187432,-2.89769,51.58231,a3e9729e-823b-462f-bf2c-6855d1175d5a,"POLYGON ((-2.8222 51.55392, -2.86658 51.54242,...",W,W06000022,133.0,159593.0,0.000833
361,362,W06000023,Powys,Powys,302329,273255,-3.43531,52.34864,e3f0d307-4ed4-41f0-8b99-d12233f66bab,"POLYGON ((-3.1475 52.89017, -3.13552 52.88501,...",W,W06000023,135.0,133165.0,0.001014


# UK ppl in France

In [9]:
gdf = gpd.read_file('communes-version-simplifiee.geojson')
gdf['lat'] = gdf.geometry.centroid.y
gdf['lon'] = gdf.geometry.centroid.x


  gdf['lat'] = gdf.geometry.centroid.y

  gdf['lon'] = gdf.geometry.centroid.x


In [10]:
df = pd.read_csv("britons_in_france_2016.csv", skiprows=0, dtype={
    'code': str,
    'nominal': str,
    'pct': str
})
df['nominal'] = df['nominal'].str.replace('.', '', regex=False)

df['nominal'] = df['nominal'].str.replace(',', '', regex=False)
df['nominal'] = pd.to_numeric(df['nominal'], errors='coerce')
df['nominal'] = df['nominal'].fillna(0)
df['pct'] = pd.to_numeric(df['pct'], errors='coerce')
df['pct'] = df['pct'].fillna(0)
df['pct'] = df['pct']/100
df = df.dropna()
df = df.sort_values(by='nominal', ascending=False)


In [11]:
gdf = gdf.merge(df, left_on='code', right_on='id', how='left')
gdf

Unnamed: 0,code,nom,geometry,lat,lon,id,name,nominal,pct
0,01073,Ceyzérieu,"POLYGON ((5.69816 45.86166, 5.70471 45.86125, ...",45.833240,5.718654,,,,
1,01262,Montluel,"POLYGON ((5.06729 45.88115, 5.0737 45.87243, 5...",45.890347,5.020370,,,,
2,01425,Tranclière,"POLYGON ((5.23549 46.10047, 5.23991 46.11296, ...",46.097333,5.260867,,,,
3,02042,Azy-sur-Marne,"POLYGON ((3.34368 48.99501, 3.33626 48.99923, ...",49.004719,3.359886,,,,
4,02140,Camelin,"POLYGON ((3.09633 49.5179, 3.12117 49.52097, 3...",49.521453,3.129031,,,,
...,...,...,...,...,...,...,...,...,...
35223,90017,Bourogne,"POLYGON ((6.88448 47.58092, 6.89641 47.58154, ...",47.563536,6.906436,,,,
35224,92025,Colombes,"POLYGON ((2.27331 48.92685, 2.25741 48.91356, ...",48.922515,2.246751,,,,
35225,95149,Chaumontel,"POLYGON ((2.4354 49.13394, 2.46131 49.13661, 2...",49.127836,2.434173,,,,
35226,95450,Neuville-sur-Oise,"POLYGON ((2.07169 49.00171, 2.06443 49.00821, ...",49.016738,2.063785,,,,


In [12]:
base_gdf = gpd.read_file("departements.geojson")
base_map = alt.Chart(base_gdf).mark_geoshape(
    fill='rgb(240,240,240)',
    stroke='rgb(225,225,225)',
    strokeWidth=0.5
)

In [14]:
chart = alt.Chart(gdf.dropna(subset=['pct', 'lat', 'lon'])).mark_circle(
    stroke='black',
    strokeWidth=0.1,
    opacity=0.8,
).encode(
    latitude='lat',
    longitude='lon',
    color=alt.Color('pct:Q', 
                    scale=alt.Scale(range=["#e0f7f6", "#80dad7", "#36B7B4"]),
                    legend=alt.Legend(
                        title="",
                        labelExpr="(datum.value == 0.08 ? ['8% born', 'in the UK'] : datum.label)",
                        format='%'
                    ),
                    title=''),
    size=alt.Size('nominal:Q', title='', 
                  scale=alt.Scale(type='linear', rangeMax=1000),
                  legend=alt.Legend(labelExpr="format(datum.value, ',.0f') + (datum.value == 15000 ? '' : '')")),  
    tooltip=[
        alt.Tooltip('code:N', title='Code'),
        alt.Tooltip('nominal:Q', title='Number of Britons'),
        alt.Tooltip('pct:Q', title='Percentage of Population')
    ]
)

chart = (base_map+chart).properties(
    width=300,
    height=300
)

chart.save("britons_in_france_map.json")
chart.save("britons_in_france_map.png", scale_factor=5)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

# FDI

In [185]:
df = pd.read_excel("publicationtablesoutward2023.xlsx", sheet_name="3.1", skiprows=3)
df = df.iloc[:, 2:]
df = df.query("Economy == 'France'")
df = df.melt(id_vars=['Economy'], var_name='date', value_name='nominal')
df['date'] = df['date'].str[:4]
df['date'] = pd.to_datetime(df['date'], format='%Y')
df['direction'] = "Outward"
outward_df = df.copy()

df = pd.read_excel("publicationtablesinward2023.xlsx", sheet_name="3.1", skiprows=3)
df = df.iloc[:, 2:]
df = df.query("Economy == 'France'")
df = df.melt(id_vars=['Economy'], var_name='date', value_name='nominal')
df['date'] = df['date'].str[:4]
df['date'] = pd.to_datetime(df['date'], format='%Y')
df['direction'] = "Inward"
inward_df = df.copy()

df = pd.read_csv("series-280525.csv", skiprows=7)
df.columns = ['date', 'gdp']
df = df[df.date.apply(lambda x: len(x) == 4)]
df['date'] = pd.to_datetime(df['date'], format='%Y')
df['gdp'] = pd.to_numeric(df['gdp'], errors='coerce')
gdp_df = df.copy()

df = pd.concat([outward_df, inward_df], ignore_index=True)
df = df.merge(gdp_df, on='date', how='left')
df['value'] = df['nominal'] / df['gdp'] 
df

df['label'] = np.where(df.date == df.date.max(), df.direction, "")


In [194]:
base = alt.Chart(df).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='', axis=alt.Axis(format='%',
                                               labelExpr="datum.value == 0.055 ? ['5.5% of UK GDP'] : datum.label"
                                               
                                               )),
    color=alt.Color('direction:N', title='', legend=None),
)

lines = base.mark_line()

end_labels = base.mark_text(
    align='left',
    baseline='middle',
    dx=5,
    fontSize=12,
).encode(
    text=alt.Text('label:N', title=None),
)

chart = (lines + end_labels).properties(
    width=500,
    height=350,
)

chart.save("uk_france_FDI_pct_of_gdp.json")
chart.save("uk_france_FDI_pct_of_gdp.png", scale_factor=2)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [195]:
df

Unnamed: 0,Economy,date,nominal,direction,gdp,value,label
0,France,2014-01-01,57030,Outward,1862514,0.03062,
1,France,2015-01-01,60488,Outward,1916451,0.031563,
2,France,2016-01-01,71579,Outward,1991645,0.03594,
3,France,2017-01-01,77592,Outward,2082482,0.037259,
4,France,2018-01-01,82881,Outward,2152304,0.038508,
5,France,2019-01-01,76100,Outward,2233921,0.034066,
6,France,2020-01-01,86327,Outward,2103486,0.04104,
7,France,2021-01-01,92639,Outward,2285400,0.040535,
8,France,2022-01-01,91573,Outward,2526428,0.036246,
9,France,2023-01-01,101205,Outward,2711185,0.037329,Outward


### France-UK GDP comp

In [196]:
UK_gdp_2024_gbp = 2850989* 10**6  # in GBP
france_gdp_2024_eur = 2921411.8 * 10**6  # in EUR
avg_gbp_to_eur_2024 = 1.1815  # Average exchange rate for 2024
france_gdp_2024_gbp = france_gdp_2024_eur / avg_gbp_to_eur_2024  # Convert France GDP to GBP

print(f"UK GDP 2024: {UK_gdp_2024_gbp / 10**9:.2f} billion GBP")
print(f"France GDP 2024: {france_gdp_2024_gbp / 10**9:.2f} billion GBP")


UK GDP 2024: 2850.99 billion GBP
France GDP 2024: 2472.63 billion GBP


# France-UK Productivty comp

In [197]:
#outputperhourworkedmay2025.xlsx
uk_prod_index_2024 = 98.8
uk_prod_index_2007 = 93.4
uk_prod_index_growth = (uk_prod_index_2024 - uk_prod_index_2007) / uk_prod_index_2007 * 100
print(f"UK production index growth from 2007 to 2024: {uk_prod_index_growth:.2f}%")

#https://data.ecb.europa.eu/data/datasets/MNA/MNA.A.N.FR.W0.S1.S1._Z.LPR_HW._Z._T._Z.IX.LR.N
france_prod_index_2024 = 97.7
france_prod_index_2007 = 92.88
france_prod_index_growth = (france_prod_index_2024 - france_prod_index_2007) / france_prod_index_2007 * 100
print(f"France production index growth from 2007 to 2024: {france_prod_index_growth:.2f}%")

UK production index growth from 2007 to 2024: 5.78%
France production index growth from 2007 to 2024: 5.19%


# Real Wages

In [201]:
# https://data-explorer.oecd.org/vis?tm=average%20annual%20wage&pg=0&snb=26&vw=tb&df[ds]=dsDisseminateFinalDMZ&df[id]=DSD_EARNINGS%40AV_AN_WAGE&df[ag]=OECD.ELS.SAE&df[vs]=1.0&dq=GBR%2BFRA......&pd=2000%2C&to[TIME_PERIOD]=false

# UK average annual wages, GBP, constant prices
uk_average_annual_wage_2023 = 43075  # GBP
uk_average_annual_wage_2007 = 43086

# France
france_average_annual_wage_2023 = 43592  # EUR
france_average_annual_wage_2007 = 39924  # EUR

print(f"UK annual wage growth from 2007 to 2023: {(uk_average_annual_wage_2023 - uk_average_annual_wage_2007) / uk_average_annual_wage_2007 * 100:.2f}%")
print(f"France annual wage growth from 2007 to 2023: {(france_average_annual_wage_2023 - france_average_annual_wage_2007) / france_average_annual_wage_2007 * 100:.2f}%")

UK annual wage growth from 2007 to 2023: -0.03%
France annual wage growth from 2007 to 2023: 9.19%


In [72]:
df.sort_values('nominal')

Unnamed: 0,id,name,nominal,pct
1663,976,Mayotte,0.0,0.000
599,35049,Cancale,0.0,0.001
600,35068,Châteaubourg,0.0,0.000
601,35069,Châteaugiron,0.0,0.001
1142,63047,La Bourboule,0.0,0.001
...,...,...,...,...
330,24037,Bergerac,909.0,0.011
1589,87126,Rochechouart,922.0,0.064
1563,86078,Civray,925.0,0.071
318,22266,Rostrenen,945.0,0.047


In [20]:
gdf

Unnamed: 0,code,nom,geometry,id,name,nominal,pct
0,01073,Ceyzérieu,"POLYGON ((5.69816 45.86166, 5.70471 45.86125, ...",,,,
1,01262,Montluel,"POLYGON ((5.06729 45.88115, 5.0737 45.87243, 5...",,,,
2,01425,Tranclière,"POLYGON ((5.23549 46.10047, 5.23991 46.11296, ...",,,,
3,02042,Azy-sur-Marne,"POLYGON ((3.34368 48.99501, 3.33626 48.99923, ...",,,,
4,02140,Camelin,"POLYGON ((3.09633 49.5179, 3.12117 49.52097, 3...",,,,
...,...,...,...,...,...,...,...
35223,90017,Bourogne,"POLYGON ((6.88448 47.58092, 6.89641 47.58154, ...",,,,
35224,92025,Colombes,"POLYGON ((2.27331 48.92685, 2.25741 48.91356, ...",,,,
35225,95149,Chaumontel,"POLYGON ((2.4354 49.13394, 2.46131 49.13661, 2...",,,,
35226,95450,Neuville-sur-Oise,"POLYGON ((2.07169 49.00171, 2.06443 49.00821, ...",,,,


In [2]:
full_df = pd.read_parquet("RP2021_indreg.parquet")

In [None]:
df = full_df.copy()

In [None]:
df.c

AttributeError: 'DataFrame' object has no attribute 'PAYS_NAIS'

In [6]:
df.PNAI12.unique()

array(['01', '12', '05', '10', '06', '07', '04', '02', '08', '03', '09',
       '11'], dtype=object)

In [5]:
list(full_df.columns)

['REGION',
 'NUMMR',
 'ACHLR',
 'AEMM',
 'AEMMR',
 'AGED',
 'AGER20',
 'AGEREV',
 'AGEREVQ',
 'ANAI',
 'ANARR',
 'ANEM',
 'ANEMR',
 'APAF',
 'ARRIVR',
 'ASCEN',
 'BAIN',
 'BATI',
 'CATL',
 'CATPC',
 'CHAU',
 'CHFL',
 'CHOS',
 'CLIM',
 'CMBL',
 'COUPLE',
 'CS1',
 'CS2',
 'CS3',
 'CUIS',
 'DEPT',
 'DEROU',
 'DIPL',
 'EAU',
 'EGOUL',
 'ELEC',
 'EMPL',
 'EPCI',
 'ETUD',
 'GARL',
 'HLML',
 'ILETUD',
 'ILETUU',
 'ILT',
 'ILTUU',
 'IMMI',
 'INAI',
 'INAT',
 'INFAM',
 'INPER',
 'INPERF',
 'IPONDI',
 'IRAN',
 'IRANUU',
 'LIENF',
 'LPRF',
 'LPRM',
 'METRODOM',
 'MOCO',
 'MOCO_DET',
 'RECOMPOSEE',
 'TAF',
 'MODV',
 'NA38',
 'NA88',
 'NAF08',
 'NAIDT',
 'NAT13',
 'NAT49',
 'NATC',
 'NATN12',
 'NATN49',
 'NATNC',
 'NBPI',
 'NPERR',
 'NUMF',
 'ORIDT',
 'PNAI12',
 'PROF',
 'RECH',
 'SANI',
 'SANIDOM',
 'SEXE',
 'SFM',
 'STAT',
 'STAT_CONJ',
 'STOCD',
 'SURF',
 'TACT',
 'TACTD16',
 'TP',
 'TRANS',
 'TYPC',
 'TYPFC',
 'TYPL',
 'TYPMD',
 'TDM8',
 'TYPMR',
 'UR',
 'VOIT',
 'WC',
 'MNAI']

In [8]:
print(df.shape)

NameError: name 'df' is not defined

In [110]:
df = full_df.copy()
df = df.query("wstatus == 'POP'")
df.c_birth.value_counts()

c_birth
TOTAL             209844
NAT               195911
FOR               194194
NEU27_2020_FOR    175620
EU27_2020_FOR     172080
NRP                70563
Name: count, dtype: int64

In [112]:
df

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,unit,wstatus,c_birth,sex,age,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG,CONF_STATUS
2985003,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,EU27_2020_FOR,F,Y15-24,AT,1999,13.0,,
2985004,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,EU27_2020_FOR,F,Y15-24,AT,2000,16.8,,
2985005,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,EU27_2020_FOR,F,Y15-24,AT,2001,17.8,,
2985006,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,EU27_2020_FOR,F,Y15-24,AT,2002,12.4,,
2985007,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,EU27_2020_FOR,F,Y15-24,AT,2003,17.4,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4003210,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,TOTAL,T,Y_GE15,UKN0,2015,1460.9,,
4003211,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,TOTAL,T,Y_GE15,UKN0,2016,1467.0,,
4003212,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,TOTAL,T,Y_GE15,UKN0,2017,1469.9,,
4003213,ESTAT:LFST_R_LFSD2PWC(1.0),14/04/25 23:00:00,A,THS_PER,POP,TOTAL,T,Y_GE15,UKN0,2018,1472.8,,


In [39]:
df = pd.read_excel("country_of_birth_2021_TS012-2021-2.xlsx")
df

Unnamed: 0,Lower tier local authorities Code,Lower tier local authorities,Country of birth (60 categories) Code,Country of birth (60 categories),Observation
0,E06000001,Hartlepool,-8,Does not apply,0
1,E06000001,Hartlepool,1,Europe: United Kingdom: England,87403
2,E06000001,Hartlepool,2,Europe: United Kingdom: Northern Ireland,160
3,E06000001,Hartlepool,3,Europe: United Kingdom: Scotland,867
4,E06000001,Hartlepool,4,Europe: United Kingdom: Wales,242
...,...,...,...,...,...
19855,W06000024,Merthyr Tydfil,55,Antarctica and Oceania: Australasia: Australia,28
19856,W06000024,Merthyr Tydfil,56,Antarctica and Oceania: Australasia: New Zealand,4
19857,W06000024,Merthyr Tydfil,57,Antarctica and Oceania: Australasia: Other Aus...,0
19858,W06000024,Merthyr Tydfil,58,Antarctica and Oceania: Other Oceania and Anta...,0


In [114]:
df.sort_values(by='balance')

Unnamed: 0,group,total,balance,pct,Direction,value
1,Travel,9339.0,-4803,23.173122,Imports,-7071.0
8,Travel,9339.0,-4803,23.173122,Exports,2268.0
4,Other,3440.0,-456,8.535768,Imports,-1948.0
11,Other,3440.0,-456,8.535768,Exports,1492.0
5,Transportation,3301.0,69,8.190864,Imports,-1616.0
12,Transportation,3301.0,69,8.190864,Exports,1685.0
0,Other Business Services,11286.0,636,28.004268,Imports,-5325.0
7,Other Business Services,11286.0,636,28.004268,Exports,5961.0
6,Intellectual property,2288.0,818,5.677278,Imports,-735.0
13,Intellectual property,2288.0,818,5.677278,Exports,1553.0


In [82]:
full_df.query("`Service type` == 'Other Business Services'")

Unnamed: 0,Direction,Service type code,Service type,value
3513,Exports,10,Other Business Services,5961
9572,Imports,10,Other Business Services,5325


In [89]:
df

Unnamed: 0,group,total,balance,pct,Direction,value
0,[Other Business Services],11286.0,636.0,28.004268,Imports,-5325.0
1,[Travel],9339.0,-4803.0,23.173122,Imports,-7071.0
2,[Financial],6436.0,3926.0,15.969827,Imports,-1255.0
3,"[Telecommunications, computer, & IT]",4211.0,2669.0,10.448872,Imports,-771.0
4,[Other],3440.0,-456.0,8.535768,Imports,-1948.0
5,[Transportation],3301.0,69.0,8.190864,Imports,-1616.0
6,[Intellectual property],2288.0,818.0,5.677278,Imports,-735.0
7,[Other Business Services],11286.0,636.0,28.004268,Exports,5961.0
8,[Travel],9339.0,-4803.0,23.173122,Exports,2268.0
9,[Financial],6436.0,3926.0,15.969827,Exports,5181.0


In [85]:
full_df.query("`Service type` == 'Financial'")

Unnamed: 0,Direction,Service type code,Service type,value
2517,Exports,7,Financial,5181
8576,Imports,7,Financial,1255


In [50]:
df.iloc[:1].total.sum()

np.float64(43715.0)

In [43]:
full_df.query("`Service type` == 'Construction'")

Unnamed: 0,Direction,Service type code,Service type,value
1853,Exports,5,Construction,45
7912,Imports,5,Construction,C


In [38]:
df['Service type code'].iloc[0].str.contains(".")  # Check if the first code contains a dot

AttributeError: 'str' object has no attribute 'str'

In [12]:
df['Service account']

0                                        0 Total Services
1                                        0 Total Services
2                                        0 Total Services
3                                        0 Total Services
4                                        0 Total Services
                              ...                        
5954                              8 Intellectual property
5955                              8 Intellectual property
5956    9 Telecommunications, computer and information...
5957    9 Telecommunications, computer and information...
5958    9 Telecommunications, computer and information...
Name: Service account, Length: 5959, dtype: object