The data is downloaded from: https://www.ons.gov.uk/economy/grossvalueaddedgva/datasets/nominalandrealregionalgrossvalueaddedbalancedbyindustry

In [1]:
import pandas as pd
from data_processing import process_gva_data, convert_nuts_itl, save_data

select Table 3c: ITL3 current price estimates, pounds million

""These tables provide estimates of gross value added (GVA) derived by balancing the income and production approaches to measuring GVA. They show economic activity for ITL1, ITL2 and ITL3 regions of the United Kingdom as current price (nominal or value) and 'real' (chained volume) measures.

These data are classified as National Statistics, according to the Code of Practice for official statistics.

Also included are implied deflators obtained by dividing the value estimates by the volume estimates. Although these are not true regional price indices, owing to the use of national prices in the deflation of GVA(B) data, they do reflect regional variation in goods and services produced, and may therefore be of use in deflating other regional data.""

In [2]:
# Path to FIT Excel file
excel_file = r"/gb_pv_capacity_model/data/20241128/raw/gva/regionalgrossvalueaddedbalancedbyindustryandallitlregions.xlsx"

# Read all sheets from the Excel file into a dictionary of DataFrames
dfs = pd.read_excel(excel_file, sheet_name=None)

# Print the sheet names and indexes
print("Sheet names and indexes:")
for idx, sheet_name in enumerate(dfs.keys()):
    print(f"Sheet Index: {idx}, Sheet Name: {sheet_name}")


Sheet names and indexes:
Sheet Index: 0, Sheet Name: Information
Sheet Index: 1, Sheet Name: Contents
Sheet Index: 2, Sheet Name: Notes
Sheet Index: 3, Sheet Name: Table 1a
Sheet Index: 4, Sheet Name: Table 1b
Sheet Index: 5, Sheet Name: Table 1c
Sheet Index: 6, Sheet Name: Table 1d
Sheet Index: 7, Sheet Name: Table 2a
Sheet Index: 8, Sheet Name: Table 2b
Sheet Index: 9, Sheet Name: Table 2c
Sheet Index: 10, Sheet Name: Table 2d
Sheet Index: 11, Sheet Name: Table 3a
Sheet Index: 12, Sheet Name: Table 3b
Sheet Index: 13, Sheet Name: Table 3c
Sheet Index: 14, Sheet Name: Table 3d


In [3]:
# Call to process and save specific SIC07 codes
selected_codes = ['Total', 'A-E', 'AB (1-9)', 'C (10-33)', 'CA (10-12)',
       'CB (13-15)', 'CC (16-18)', 'CD-CG (19-23)', 'CH (24-25)',
       'CI-CJ (26-27)', 'CK-CL (28-30)', 'CM (31-33)', 'DE (35-39)',
       'F (41-43)', 41, 42, 43, 'G-T', 'G (45-47)', 45, 46, 47,
       'H (49-53)', '49-51', 52, 53, 'I (55-56)', 55, 56, 'J (58-63)',
       '58-60', '61-63', 'K (64-66)', 64, '65-66', 'L (68)', '68IMP', 68,
       'M (69-75)', 69, 70, 71, '72-73', 74, 75, 'N (77-82)', 77, '78-80',
       81, 82, 'O (84)', 'P (85)', 'Q (86-88)', 86, 87, 88, 'R (90-93)',
       '90-91', '92-93', 'S (94-96)', 94, 95, 96, 'T (97-98)']


In [4]:
gva = process_gva_data(excel_file, sheet_index=13, selected_sic07_codes=selected_codes, date="20241128")
gva

Unnamed: 0,nuts_cd,SIC07 code,SIC07 description,date,gva_pounds_million
0,UKC11,Total,All industries,1998-12-31,3317
1,UKC11,A-E,Production sector,1998-12-31,1013
2,UKC11,AB (1-9),"Agriculture, forestry and fishing; mining and ...",1998-12-31,17
3,UKC11,C (10-33),Manufacturing,1998-12-31,775
4,UKC11,CA (10-12),"Manufacture of food, beverages and tobacco",1998-12-31,102
...,...,...,...,...,...
286395,UKN0G,S (94-96),Other service activities,2022-12-31,16
286396,UKN0G,94,Activities of membership organisations,2022-12-31,2
286397,UKN0G,95,"Repair of computers, personal and household goods",2022-12-31,1
286398,UKN0G,96,Other personal service activities,2022-12-31,14


In [5]:
# available codes
unique_sic07_codes = gva['SIC07 code'].unique()
unique_sic07_codes


array(['Total', 'A-E', 'AB (1-9)', 'C (10-33)', 'CA (10-12)',
       'CB (13-15)', 'CC (16-18)', 'CD-CG (19-23)', 'CH (24-25)',
       'CI-CJ (26-27)', 'CK-CL (28-30)', 'CM (31-33)', 'DE (35-39)',
       'F (41-43)', 41, 42, 43, 'G-T', 'G (45-47)', 45, 46, 47,
       'H (49-53)', '49-51', 52, 53, 'I (55-56)', 55, 56, 'J (58-63)',
       '58-60', '61-63', 'K (64-66)', 64, '65-66', 'L (68)', '68IMP', 68,
       'M (69-75)', 69, 70, 71, '72-73', 74, 75, 'N (77-82)', 77, '78-80',
       81, 82, 'O (84)', 'P (85)', 'Q (86-88)', 86, 87, 88, 'R (90-93)',
       '90-91', '92-93', 'S (94-96)', 94, 95, 96, 'T (97-98)'],
      dtype=object)

In [6]:
# # Split the DataFrame based on 'SIC07 code'
# dfs_by_sic07 = {sic07: group_df for sic07, group_df in gva.groupby('SIC07 code')}

# # # Example of accessing a specific DataFrame
# # for sic07_code, group in dfs_by_sic07.items():
# #     print(f"DataFrame for SIC07 code '{sic07_code}':")
# #     print(group)
# #     print("-" * 10)

In [7]:
# save to csv
save_data({'gva':gva}, date="20241128" )