Notebook to extract the permanent pasture area in each municipality from the census data of 1999

In [59]:
import pandas as pd

# Load data

In [60]:
import os

path_to_census_folder = "./"

path_to_alentejo_data = os.path.join(path_to_census_folder, "218_RGA Alentejo.xls")
path_to_EDM_data = os.path.join(path_to_census_folder, "213_RGA EDM.xls")
path_to_beiralitoral_data = os.path.join(path_to_census_folder, "216_RGA Beira Litoral.xls")
path_to_tràsmontes_data = os.path.join(path_to_census_folder, "217_RGA Trás Montes.xls")
path_to_roeste_data = os.path.join(path_to_census_folder, "220_RGA ROeste.xls")
path_to_BI_data = os.path.join(path_to_census_folder, "RGA-BI_1999.xls")

In [61]:
# Function to extract the permanent pastures data from each file
def extract_pastures_data(file_path, sheets_to_fetch, header_col):
    all_data = pd.read_excel(file_path, sheet_name=sheets_to_fetch, index_col=[0, 2], header=header_row)

    pastures_data = pd.DataFrame()
    for sheet in all_data.values():
        #new_data = sheet.loc[('Pastagens permanentes', 'área (ha)')]
        new_data = sheet.loc['Pastagens permanentes']
        pastures_data = pd.concat([pastures_data, new_data], axis=1, sort=False)
    
    #pastures_data = pastures_data.transpose().droplevel(1, axis=1)
    pastures_data = pastures_data.transpose()
    pastures_data.drop('Unnamed: 1', inplace=True)
    
    # For the files that have second levels for Pastagens permanentes, select only first two correspondence
    if len(pastures_data.columns) > 1:
        pastures_data = pastures_data.iloc[:, [0, 1]]
    pastures_data.rename(columns={'expl (nº)': 'expl_number','área (ha)': 'pastures_area_munic'}, inplace=True)
    return pastures_data

### Alentejo

In [62]:
sheets_to_fetch = ['002', '003', '004', '005', '006']
header_row = 2
pastures_alentejo = extract_pastures_data(path_to_alentejo_data, sheets_to_fetch, header_row)

In [63]:
total_alentejo = pastures_alentejo.loc['ALENTEJO', 'pastures_area_munic']

In [64]:
rows_to_drop = ['ALENTEJO', 'Alentejo Litoral', 'Alto Alentejo', 'Alentejo Central', 'Baixo Alentejo']
pastures_alentejo.drop(rows_to_drop, inplace=True)

In [65]:
# Check on total area
total_alentejo == pastures_alentejo['pastures_area_munic'].sum()

True

### Entre Douro e Minho

In [66]:
sheets_to_fetch = ['002', '003', '004', '005', '006', '007']
header_row = 3
pastures_edm = extract_pastures_data(path_to_EDM_data, sheets_to_fetch, header_row)

In [67]:
pastures_edm

Unnamed: 0,expl_number,pastures_area_munic
Entre Douro e Minho,13409,70736
Minho Lima,6200,43028
Arcos de Valdevez,1522,12923
Caminha,391,1642
Melgaço,805,5226
Monção,910,4234
Paredes de Coura,476,2027
Ponte da Barca,511,9121
Ponte de Lima,567,3909
Valença,66,815


In [68]:
total_edm = pastures_edm.loc['Entre Douro e Minho', 'pastures_area_munic']

In [69]:
rows_to_drop = ['Entre Douro e Minho', 'Minho Lima', 'Cávado', 'Ave', 'Grande Porto', 'Tâmega', 'Entre Douro e Vouga']
pastures_edm.drop(rows_to_drop, inplace=True)

In [70]:
# Porto municipality has to be added since missing data for it, assuming with an area of 0
pastures_edm.loc['Porto'] = 0

In [71]:
# No data for São João da Madeira
pastures_edm.loc['São João da Madeira'] = 0

In [72]:
# Check on total area
total_edm == pastures_edm['pastures_area_munic'].sum()

True

In [73]:
pastures_edm.head(2)

Unnamed: 0,expl_number,pastures_area_munic
Arcos de Valdevez,1522,12923
Caminha,391,1642


### Beira litoral

In [74]:
sheets_to_fetch = ['002', '003', '004', '005', '006', '007', '008', '009']
header_row = 3
pastures_beiralitoral = extract_pastures_data(path_to_beiralitoral_data, sheets_to_fetch, header_row)

In [75]:
total_beiralitoral = pastures_beiralitoral.loc['BEIRA LITORAL', 'pastures_area_munic']

In [76]:
rows_to_drop = ['BEIRA LITORAL', 'Baixo Vouga', 'Baixo Mondego', 'Pinhal Litoral', 'Pinhal Interior Norte', 'Dão-Lafões']
pastures_beiralitoral.drop(rows_to_drop, inplace=True)

In [77]:
pastures_beiralitoral[pastures_beiralitoral.iloc[:, 1].str.isdigit() == False] = 0

In [78]:
# Check on total area
total_beiralitoral == pastures_beiralitoral['pastures_area_munic'].sum()

False

In [79]:
total_beiralitoral - pastures_beiralitoral['pastures_area_munic'].sum()

5.0

We miss 5 hectares that are in the municipalities reported with ..., so we can distribute among them (since they have respectively 1 exploration and 2)

In [80]:
pastures_beiralitoral.loc['Castanheira de Pêra'] = 2
pastures_beiralitoral.loc['Pedrógão Grande'] = 3

In [81]:
# Check again on total area
total_beiralitoral == pastures_beiralitoral['pastures_area_munic'].sum()

True

### Trás-os-Montes

In [82]:
sheets_to_fetch = ['002', '003', '004']
header_row = 3
pastures_tràsmontes = extract_pastures_data(path_to_tràsmontes_data, sheets_to_fetch, header_row)

In [83]:
total_tràsmontes = pastures_tràsmontes.loc['TRÁS OS MONTES', 'pastures_area_munic']

In [84]:
rows_to_drop = ['TRÁS OS MONTES', 'Douro', 'Alto Trás os Montes']
pastures_tràsmontes.drop(rows_to_drop, inplace=True)

In [85]:
# Check on total area
total_tràsmontes == pastures_tràsmontes['pastures_area_munic'].sum()

True

In [86]:
pastures_tràsmontes.head(2)

Unnamed: 0,expl_number,pastures_area_munic
Alijó,721,808
Armamar,83,48


### Extremadura, Ribatejo e Oeste

In [87]:
sheets_to_fetch = ['002', '003', '004', '005', '006']
header_row = 3
pastures_roeste = extract_pastures_data(path_to_roeste_data, sheets_to_fetch, header_row)

In [88]:
total_roeste = pastures_roeste.loc['LISBOA E VALE DO TEJO', 'pastures_area_munic'][0]

In [89]:
rows_to_drop = ['LISBOA E VALE DO TEJO', 'Oeste', 'Grande Lisboa', 'Península de Setúbal', 'Médio Tejo', 'Lezíria do Tejo']
pastures_roeste.drop(rows_to_drop, inplace=True)

In [90]:
# Lisboa municipality has to be added
pastures_roeste.loc['Lisboa'] = 0

In [91]:
# Check on total area
total_roeste == pastures_roeste['pastures_area_munic'].sum()

True

In [92]:
pastures_roeste.head(2)

Unnamed: 0,expl_number,pastures_area_munic
Alcobaça,384,657
Alenquer,85,550


### Beira interior

In [93]:
sheets_to_fetch = ['002', '003', '004', '005', '006']
header_row = 3
pastures_beirainterior = extract_pastures_data(path_to_BI_data, sheets_to_fetch, header_row)

In [94]:
total_beirainterior = pastures_beirainterior.loc['BEIRA INTERIOR', 'pastures_area_munic']

In [95]:
rows_to_drop = ['BEIRA INTERIOR', 'Pinhal Interior Sul', 'Serra da Estrela', 'Beira Interior Norte', 'Beira Interior Sul', 'Cova da Beira']
pastures_beirainterior.drop(rows_to_drop, inplace=True)

In [96]:
# Check on total area
total_beirainterior == pastures_beirainterior['pastures_area_munic'].sum()

True

In [97]:
pastures_beirainterior.head(2)

Unnamed: 0,expl_number,pastures_area_munic
Mação,195,147
Oleiros,19,10


### Algarve

**NO DATA AVAILABLE**

# Generation of complete set

In [98]:
datasets = [pastures_alentejo, pastures_beirainterior, pastures_beiralitoral, pastures_edm, pastures_roeste, pastures_tràsmontes]

In [99]:
pastures_area = pd.DataFrame(columns=pastures_alentejo.columns)
for data in datasets:
    pastures_area = pastures_area.append(data)

In [100]:
pastures_data = pastures_area.drop('expl_number', axis=1)

In [101]:
pastures_area.loc[pastures_area['expl_number'] == 0]

Unnamed: 0,expl_number,pastures_area_munic
São João da Madeira,0,0
Porto,0,0
Lisboa,0,0


In [102]:
pastures_area.loc[['Lisboa', 'Porto', 'São João da Madeira'], 'expl_number'] = 1 #To avoid division by 0

In [103]:
pastures_data['pastures_mean_size_munic'] = pastures_area['pastures_area_munic'] / pastures_area['expl_number']

Corrections of names to be added (copied here after checking in "Spatial granularity harmonization")

In [104]:
corrections = {'Ponte Sôr': 'Ponte de Sor',
               'Vila Velha de Rodão': 'Vila Velha de Ródão',
               'Condeixa -a- Nova': 'Condeixa-a-Nova',
               'Montemor -o- Velho': 'Montemor-o-Velho',
               'Terras do Bouro': 'Terras de Bouro',
               'Moimenta da  Beira': 'Moimenta da Beira'}
pastures_data.rename(index=corrections, inplace=True)

# Save files

In [105]:
pastures_data.index.name = 'Municipality'

In [106]:
only_pastures_area = pastures_data['pastures_area_munic']

In [107]:
out_file_only_pastures_area = "municipalities_permanent_pastures_area.csv"

In [108]:
only_pastures_area.to_csv(out_file_only_pastures_area)

In [109]:
out_file_pastures_data = "census_pastures_data.csv"

In [110]:
pastures_data.to_csv(out_file_pastures_data)