**Important**: to run the notebook, launch jupyter notebook from the anaconda prompt after having activated the environment

**Environment for the project**: aggregated_SBP_adoption_dataset

**Requirements**: numpy pandas xlrd geopandas openpyxl

Notebook to check the geographical granularity of the different datasets and merge them

## Content
* Data ingestion
* Comparison
* Corrections:
    * Correction of PCF counties to correspond to the ones in the shapefile
    * Disaggregation of pre-PCF adoption:
        * Match of regions with municipalities
        * Disaggregation of adopted area based on pasture area in each municipality
      
**NOTE:** in the last section on disaggregartion, possible to choose between the two different possibilities based on the disaggregation of pre-PCF adoption.

In [1]:
import numpy as np
import pandas as pd

# Data ingestion

## SBP adoption previous to the PCF project

In [2]:
path_to_adoption_pre_PCF = "./Terraprima - PCF/Pastures before 2009.xlsx"

In [3]:
adoption_pre_PCF = pd.read_excel(path_to_adoption_pre_PCF, header=2, index_col=1)
adoption_pre_PCF = adoption_pre_PCF.drop('Unnamed: 0', axis=1)
adoption_pre_PCF.head()

Unnamed: 0_level_0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Minho,0,1.0,2,0.0,30.0,29,12,27.5,46,46,79,90,455
Aveiro,0,6.0,5,0.0,3.0,0,10,30.0,37,35,36,138,50
Coimbra,5,2.5,0,1.5,4.5,9,2,26.5,12,7,7,18,15
Trás-os-Montes,2,5.0,32,35.0,30.0,41,76,89.0,110,69,84,120,97
Guarda,18,73.0,135,128.0,35.0,16,92,100.0,142,108,119,134,223


In [4]:
regions_prePCF = adoption_pre_PCF.index.tolist()
len(regions_prePCF)

24

## PCF project

In [5]:
path_to_PCF_data = "./Terraprima - PCF/20160729_RelCampo_Pastagens_Chave_RT.xlsx"

In [6]:
cols_to_fetch = ['Parcel_ID', 'Farmer_ID', 'Year that the pasture was installed', 'County', 'Area_Total_SIG_PPI_2009_ha',
                 'Area_Total_SIG_PPI_2010_ha', 'Area_Total_SIG_PPII_2011_ha', 'Area_Total_SIG_PPII_2012_ha']
PCF_data = pd.read_excel(path_to_PCF_data, sheet_name='Data_table', usecols=cols_to_fetch, header=1, index_col=0)
PCF_data = PCF_data.fillna(0)
PCF_data.head()

Unnamed: 0_level_0,Farmer_ID,Year that the pasture was installed,County,Area_Total_SIG_PPI_2009_ha,Area_Total_SIG_PPI_2010_ha,Area_Total_SIG_PPII_2011_ha,Area_Total_SIG_PPII_2012_ha
Parcel_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
55,1,2009,Idanha-a-Nova,9.28,0.0,0.0,0.0
410,2,2010,Évora,0.0,3.21,0.0,0.0
681,2,2011,Évora,0.0,0.0,6.83,0.0
1068,3,2012,Reguengos de Monsaraz,0.0,0.0,0.0,38.17
584,4,2010,Avis,0.0,9.57,0.0,0.0


In [7]:
counties_PCF = PCF_data['County'].unique().tolist()
len(counties_PCF)

86

## Census data 1999

**NOTE:** data for Algarve are missing

In [8]:
import os

path_to_census_folder = "./Census 1999"

path_to_alentejo_data = os.path.join(path_to_census_folder, "218_RGA Alentejo.xls")
path_to_EDM_data = os.path.join(path_to_census_folder, "213_RGA EDM.xls")
path_to_beiralitoral_data = os.path.join(path_to_census_folder, "216_RGA Beira Litoral.xls")
path_to_tràsmontes_data = os.path.join(path_to_census_folder, "217_RGA Trás Montes.xls")
path_to_roeste_data = os.path.join(path_to_census_folder, "220_RGA ROeste.xls")
path_to_BI_data = os.path.join(path_to_census_folder, "RGA-BI_1999.xls")

In [9]:
alentejo = pd.read_excel(path_to_alentejo_data, sheet_name="001", usecols="B")
alentejo = alentejo.dropna()
alentejo = alentejo.values
alentejo[:5]

array([['Alcácer do Sal'],
       ['Grândola'],
       ['Odemira'],
       ['Santiago do Cacém'],
       ['Sines']], dtype=object)

In [10]:
edm = pd.read_excel(path_to_EDM_data, sheet_name="001", usecols="C")
edm = edm.dropna()
edm = edm.values
edm = np.delete(edm, 0)
edm[:5]

array(['Arcos de Valdevez', 'Caminha', 'Melgaço', 'Monção',
       'Paredes de Coura'], dtype=object)

In [11]:
beira_litoral = pd.read_excel(path_to_beiralitoral_data, sheet_name="001", usecols="C")
beira_litoral = beira_litoral.dropna()
beira_litoral = beira_litoral.values
beira_litoral[:5]

array([['Águeda'],
       ['Albergaria-a-Velha'],
       ['Anadia'],
       ['Aveiro'],
       ['Estarreja']], dtype=object)

In [12]:
tràs_montes = pd.read_excel(path_to_tràsmontes_data, sheet_name="001", usecols="C")
tràs_montes = tràs_montes.dropna()
tràs_montes = tràs_montes.values
tràs_montes = np.delete(tràs_montes, 0)
tràs_montes[:5]

array(['Alijó', 'Armamar', 'Carrazeda de Ansiães',
       'Freixo de Espada à Cinta', 'Lamego'], dtype=object)

In [13]:
roeste = pd.read_excel(path_to_roeste_data, sheet_name="001", usecols="C")
roeste = roeste.dropna()
roeste = roeste.values
roeste = np.delete(roeste, 0)
roeste[:5]

array(['Alcobaça', 'Alenquer', 'Arruda dos Vinhos', 'Bombarral',
       'Cadaval'], dtype=object)

In [14]:
beira_interior = pd.read_excel(path_to_BI_data, sheet_name="001", usecols="C")
beira_interior = beira_interior.dropna()
beira_interior = beira_interior.values
beira_interior[:5]

array([['Mação'],
       ['Oleiros'],
       ['Proença-a-Nova'],
       ['Sertã'],
       ['Vila de Rei']], dtype=object)

In [15]:
regions_munic = [edm, tràs_montes, beira_litoral, beira_interior, roeste, alentejo]

In [16]:
municipalities_census = [munic for region in regions_munic for munic in region]
municipalities_census[:5]

['Arcos de Valdevez', 'Caminha', 'Melgaço', 'Monção', 'Paredes de Coura']

In [17]:
len(municipalities_census)

260

## Shapefile

In [18]:
import geopandas as gpd

In [19]:
path_to_shapefile = "./counties_shp/mod_concelhos.shp"

In [20]:
shapefile_data = gpd.read_file(path_to_shapefile)

In [21]:
shapefile_data.head()

Unnamed: 0,CCA_2,District,Municipali,geometry
0,705,Évora,Évora,"POLYGON ((-7.79291 38.76507, -7.79287 38.76506..."
1,701,Évora,Alandroal,"POLYGON ((-7.25937 38.77351, -7.25921 38.77343..."
2,702,Évora,Arraiolos,"POLYGON ((-7.88611 38.92495, -7.88580 38.92472..."
3,703,Évora,Borba,"POLYGON ((-7.46362 38.92344, -7.46344 38.92329..."
4,704,Évora,Estremoz,"POLYGON ((-7.52770 39.00080, -7.52765 39.00066..."


In [22]:
shapefile_data.columns=['CCA_2', 'District', 'Municipality', 'geometry']

In [23]:
municipalities_shapefile = shapefile_data['Municipality'].tolist()
len(municipalities_shapefile)

308

In [24]:
districts_shapefile = shapefile_data['District'].value_counts().keys().tolist()
len(districts_shapefile)

20

In [25]:
shapefile_data.set_index('Municipality', inplace=True)

In [26]:
shapefile_data.head()

Unnamed: 0_level_0,CCA_2,District,geometry
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Évora,705,Évora,"POLYGON ((-7.79291 38.76507, -7.79287 38.76506..."
Alandroal,701,Évora,"POLYGON ((-7.25937 38.77351, -7.25921 38.77343..."
Arraiolos,702,Évora,"POLYGON ((-7.88611 38.92495, -7.88580 38.92472..."
Borba,703,Évora,"POLYGON ((-7.46362 38.92344, -7.46344 38.92329..."
Estremoz,704,Évora,"POLYGON ((-7.52770 39.00080, -7.52765 39.00066..."


# Comparison

## Aggregated adoption pre-PCF and during PCF

In [27]:
common_regions = [reg for reg in regions_prePCF if reg in counties_PCF]
common_regions

['Guarda',
 'Castelo Branco',
 'Abrantes',
 'Santarém',
 'Coruche',
 'Évora',
 'Elvas',
 'Portalegre',
 'Estremoz',
 'Ponte de Sor',
 'Odemira',
 'Beja']

Still, not sure if terrotorially these regions coincide (they could be bigger in the pre-PCF dataset including neighbouring municipalitie)

## Census - PCF adoption

In [28]:
munic_in_PCF_not_in_census = [munic for munic in counties_PCF if munic not in municipalities_census]
munic_in_PCF_not_in_census

['Vila Velha de Rodão',
 'Alcácer do Sal - Torrão/Alvito-V.N. Baronia',
 'Ourique ',
 'Santiago do cacém',
 'Alcácer do Sal - Santa Susana',
 'Évora / Montemor-o-Novo',
 'Moura e Serpa',
 'Albernoa',
 'Benavente/Porto Alto',
 'Elvas e Campo Maior',
 'Alcácer do Sal - Torrão',
 'Lisboa - Serpa',
 'Ferreira do Alentejo /Figueira dos Cavaleiros',
 'Ponte de Sor / Montargil']

In [29]:
len(munic_in_PCF_not_in_census)

14

There are some conflicts of names to be solved, but out of 86 municipalities in the PCF data 72 are also in the census data, so the geographical division coincides in general terms

## Shapefile - Census

In [30]:
munic_in_census_not_in_shapefile = [munic for munic in municipalities_census if munic not in municipalities_shapefile]
munic_in_census_not_in_shapefile

[]

In [31]:
munic_in_shapefile_not_in_census = [munic for munic in municipalities_shapefile if munic not in municipalities_census]
len(munic_in_shapefile_not_in_census)

48

No municipality in the census is missing in the shapefile (there was Meda but changed the shapefile)

A lot from the shapefile are missing from the census, with the majority most likely belonging to Algarve (Faro district), Azores and Madeira for which we have no data. Let's check if more are missing.

### Check excluding Madeira, Azores and Faro districts from shapefile


In [32]:
len(shapefile_data)

308

In [33]:
shapefile_data_excl = shapefile_data.loc[(shapefile_data['District'] != 'Azores') 
                                         & (shapefile_data['District'] != 'Madeira')
                                         & (shapefile_data['District'] != 'Faro')]

In [479]:
len(shapefile_data_excl)

262

In [480]:
municipalities_shapefile_excl = shapefile_data_excl.index.to_list()

In [481]:
munic_in_shapefile_excl_not_in_census = [munic for munic in municipalities_shapefile_excl if munic not in municipalities_census]
len(munic_in_shapefile_excl_not_in_census)

2

In [482]:
munic_in_shapefile_excl_not_in_census

['Lisboa', 'Porto']

Apart from the ones for which we have no data, the census does not report data for the municipalities of Lisboa and Porto.
**Can it be because there is no agricultural land there?**

## Shapefile - pre-PCF adoption

In [483]:
regions_as_districts = [reg for reg in regions_prePCF if reg in districts_shapefile]
len(regions_as_districts)

10

In [484]:
regions_as_municipalities = [reg for reg in regions_prePCF if reg in municipalities_shapefile]
len(regions_as_municipalities)

17

In [485]:
regions_in_shapefile = regions_as_districts + regions_as_municipalities
regions_as_nothing = [region for region in regions_prePCF if region not in regions_in_shapefile]
len(regions_as_nothing)

7

This means that in the prePCF dataset we have:
* 7 regions that have the same name of 6 municipalities
* 6 regions that could be both districts or municipalities
* 11 that do not appear in the shapefile at all

### Shapefile - PCF data

In [486]:
munic_in_PCF_not_in_shapefile = [munic for munic in counties_PCF if munic not in municipalities_shapefile]
munic_in_PCF_not_in_shapefile

['Vila Velha de Rodão',
 'Alcácer do Sal - Torrão/Alvito-V.N. Baronia',
 'Ourique ',
 'Santiago do cacém',
 'Alcácer do Sal - Santa Susana',
 'Évora / Montemor-o-Novo',
 'Moura e Serpa',
 'Albernoa',
 'Benavente/Porto Alto',
 'Elvas e Campo Maior',
 'Alcácer do Sal - Torrão',
 'Lisboa - Serpa',
 'Ferreira do Alentejo /Figueira dos Cavaleiros',
 'Ponte de Sor / Montargil']

In [487]:
munic_in_PCF_not_in_shapefile == munic_in_PCF_not_in_census

True

The only difference is that 'Ponte de Sor' is in shapefile but not in census

## Conclusions

Make sense to use the municipality level agregation. The only dataset that has a lower granularity is the adoption previous to the PCF, that will have to be disaggregated, probably based on the pasture area in each municipality (available from the census). For the rest, some adjustements are required.

# Correction of PCF counties to correspond to the ones in the shapefile

In [488]:
munic_in_PCF_not_in_shapefile = [munic for munic in counties_PCF if munic not in municipalities_shapefile]
munic_in_PCF_not_in_shapefile

['Vila Velha de Rodão',
 'Alcácer do Sal - Torrão/Alvito-V.N. Baronia',
 'Ourique ',
 'Santiago do cacém',
 'Alcácer do Sal - Santa Susana',
 'Évora / Montemor-o-Novo',
 'Moura e Serpa',
 'Albernoa',
 'Benavente/Porto Alto',
 'Elvas e Campo Maior',
 'Alcácer do Sal - Torrão',
 'Lisboa - Serpa',
 'Ferreira do Alentejo /Figueira dos Cavaleiros',
 'Ponte de Sor / Montargil']

In [489]:
replacements = {}

#### Albernoa

It's a civil parish in the municipality of Beja

In [490]:
[munic for munic in counties_PCF if 'Beja' in munic]

['Beja']

In [491]:
[munic for munic in municipalities_shapefile if 'Beja' in munic]

['Beja']

Since both databases present Beja, the values reported in Albernoa were conidered in Beja

In [492]:
replacements['Albernoa'] = 'Beja'

####  Alcácer do Sal - Santa Susana, Alcácer do Sal - Torrão, Alcácer do Sal - Torrão/Alvito-V.N. Baronia

In [493]:
'Alcácer do Sal' in municipalities_shapefile

True

In [494]:
[munic in municipalities_shapefile for munic in ['Santa Susana', 'Torrão', 'Alvito']]

[False, False, True]

In [495]:
'Alvito' in counties_PCF

True

Since the shapefile contains the municipality of Alcácer do Sal and not the other more specific areas, we can group the three entries in the PCF data to Alcácer do Sal. (Alvito is in the municipalities but there is another single entry for it in the PCF).


In [496]:
replacements['Alcácer do Sal - Santa Susana'] = 'Alcácer do Sal'
replacements['Alcácer do Sal - Torrão'] = 'Alcácer do Sal'
replacements['Alcácer do Sal - Torrão/Alvito-V.N. Baronia'] = 'Alcácer do Sal'

#### Benavente/Porto Alto

In [497]:
[mun in counties_PCF for mun in ['Benavente', 'Porto Alto']]

[True, False]

In [498]:
[mun in municipalities_shapefile for mun in ['Benavente', 'Porto Alto']]

[True, False]

While the shapefile has only an entry for Benavente, the PCF project dataset presents two, Benavente and Benavente/Porto Alto, both referring to Bonavente municipality

In [499]:
replacements['Benavente/Porto Alto'] = 'Benavente'

#### Elvas e Campo Maior

In [500]:
[mun in counties_PCF for mun in ['Elvas', 'Campo Maior']]

[True, True]

In [501]:
[mun in municipalities_shapefile for mun in ['Elvas', 'Campo Maior']]

[True, True]

Elvas and Campo Maior are two distinct municipalities with two separate entries in the PCF project database. Checking the spreadsheet, it is possible to see that the parcels with County value Elvas e Campo Maior are four, all corresponding to the same farmer. Since no more information is available to decide to which municipality assign this area, it is arbitrarily assigned to the municipality with the biggest surface, that is Elvas.

In [502]:
replacements['Elvas e Campo Maior'] = 'Elvas'

#### Ferreira do Alentejo /Figueira dos Cavaleiros

In [503]:
[mun in counties_PCF for mun in ['Ferreira do Alentejo', 'Figueira dos Cavaleiros']]

[True, False]

In [504]:
[mun in municipalities_shapefile for mun in ['Ferreira do Alentejo', 'Figueira dos Cavaleiros']]

[True, False]

As for Benavente/Porto Alto

In [505]:
replacements['Ferreira do Alentejo /Figueira dos Cavaleiros'] = 'Ferreira do Alentejo'

#### Lisboa - Serpa

In [506]:
[mun in counties_PCF for mun in ['Lisboa', 'Serpa']]

[False, True]

Serpa is a different municipality not close to Lisbon and it has its own entry in the PCF project database. However, the municipality of Lisboa includes mainly urban area and therefore an adoption in this municipality is unlikely.

In [507]:
replacements['Lisboa - Serpa'] = 'Serpa'

#### Moura e Serpa

In [508]:
[mun in counties_PCF for mun in ['Moura', 'Serpa']]

[True, True]

In [509]:
[mun in municipalities_shapefile for mun in ['Moura', 'Serpa']]

[True, True]

As for Elvas e Campo Maior, both exists

In [510]:
replacements['Moura e Serpa'] = 'Serpa'

#### Ourique

In [511]:
[mun in counties_PCF for mun in ['Ourique ', 'Ourique']]

[True, True]

This is a typing error, there is an entry for Ourique that has a blank space after the name

In [512]:
replacements['Ourique '] = 'Ourique'

#### Ponte de Sor, Ponte de Sor / Montargil

In [513]:
[mun in counties_PCF for mun in ['Ponte de Sor', 'Montargil']]

[True, False]

In [514]:
[mun in municipalities_shapefile for mun in ['Ponte de Sor', 'Montargil']]

[True, False]

In [515]:
[mun in municipalities_census for mun in ['Ponte de Sor', 'Montargil']]

[True, False]

The shapefile reported the Ponte de Sor municipality as Ponte de Sôr, but the shapefile was changed. Since Montargil is not a municipality, Ponte de Sor / Montargil is considered belonging to Ponte de Sor.

In [516]:
replacements['Ponte de Sor / Montargil'] = 'Ponte de Sor'

#### Santiago do cacém

In [517]:
'Santiago do Cacém' in municipalities_shapefile

True

In [518]:
'Santiago do Cacém' in counties_PCF

True

It appears not capitalized sometimes in the database

In [519]:
replacements['Santiago do cacém'] = 'Santiago do Cacém'

#### Vila Velha de Rodão

In [520]:
'Vila Velha de Ródão' in municipalities_shapefile

True

In [521]:
'Vila Velha de Ródão' in counties_PCF

False

In the PCF project, it misses the accent on the o of Ródão

In [522]:
replacements['Vila Velha de Rodão'] = 'Vila Velha de Ródão'

#### Évora / Montemor-o-Novo

In [523]:
[mun in counties_PCF for mun in ['Évora', 'Montemor-o-Novo']]

[True, True]

In [524]:
[mun in municipalities_shapefile for mun in ['Évora', 'Montemor-o-Novo']]

[True, True]

As for Elvas e Campo Maior, both exists

In [525]:
replacements['Évora / Montemor-o-Novo'] = 'Évora'

### Create the PCF project database with the corrected counties and save it

In [526]:
new_PCF_data = PCF_data.replace(replacements)

In [527]:
new_PCF_data.to_excel("./Terraprima - PCF/From spatial granularity harmonization/PCF project data_Corrected counties.xlsx")

### Check everything replaced properly

In [528]:
corrected_counties_PCF = new_PCF_data['County'].unique().tolist()
len(corrected_counties_PCF)

73

In [529]:
to_still_fix_PCF = [munic for munic in corrected_counties_PCF if munic not in municipalities_shapefile]
to_still_fix_PCF

[]

## Are there municipalities that adopted during the PCF project in Azores, Madeira or Algarve?

In [530]:
munic_in_azores = shapefile_data.loc[(shapefile_data['District'] == 'Azores')]
munic_in_madeira = shapefile_data.loc[(shapefile_data['District'] == 'Madeira')]

for munic in corrected_counties_PCF:
    if munic in munic_in_azores:
        print(munic, 'in Azores')
    if munic in munic_in_madeira:
        print(munic, 'in Madeira')

There was no adoption during the PCF programme in Azores and Madeira

In [531]:
munic_in_algarve = shapefile_data.loc[(shapefile_data['District'] == 'Faro')]

for munic in corrected_counties_PCF:
    if munic in munic_in_algarve:
        print(munic, 'in Algarve')

Also no adoption in Algarve

# Disaggregation of pre-PCF adoption

## Matching of pre-PCF adoption regions with municipalities

Result of this part is the **mapping** dictionary, which maps each regions in the pre-PCF dataset with the corresponding municipalities

In [532]:
regions_as_districts

['Aveiro',
 'Coimbra',
 'Guarda',
 'Viseu',
 'Castelo Branco',
 'Leiria',
 'Santarém',
 'Évora',
 'Portalegre',
 'Beja']

In [533]:
regions_as_municipalities

['Aveiro',
 'Coimbra',
 'Guarda',
 'Viseu',
 'Castelo Branco',
 'Leiria',
 'Abrantes',
 'Santarém',
 'Tomar',
 'Coruche',
 'Évora',
 'Elvas',
 'Portalegre',
 'Estremoz',
 'Ponte de Sor',
 'Odemira',
 'Beja']

In [534]:
regions_as_nothing

['Minho',
 'Trás-os-Montes',
 'Oeste',
 'Montemor',
 'Ferreira',
 'Algarve',
 'Madeira + Azores']

Regions: (https://en.wikipedia.org/wiki/Districts_of_Portugal) 
* Algarve --> Faro (16 munic)
* Trás-os-Montes --> Bragança + Vila Real + partly Viseu and Guarda, 31 munic. (https://en.wikipedia.org/wiki/Tr%C3%A1s-os-Montes_e_Alto_Douro_Province)
* Oeste --> 12 municip. (https://en.wikipedia.org/wiki/Oeste_(intermunicipal_community)), in the districts of Lisboa and Leiria
* Madeira + Azores --> Madeira + Azores 

Agric. regions:
* Minho --> Braga + Viana do Castelo (14 + 10 munic)

Could be districts or municipalities:
* Aveiro
* Coimbra
* Guarda
* Viseu
* Leiria
* Portalegre
* Beja
* Castelo Branco
* Évora
* Santarém

Corresponding only to municipalities:
* Odemira (in Beja)
* Estremoz (in Évora)
* Elvas (in Portalegre)
* Ponte de Sor (in Portalegre)
* Coruche (in Santarém)
* Tomar (in Santarém)
* Abrantes (in Santarém)

Not clear ones:
* Montemor: can be assumed to be Montemor-o-Novo, in the Évora district and with a much bigger area than Montemor-o-Velho
* Ferreira: can be assumed to be Ferreira do Alentejo, also present in the PCF project and in Beja district, bigger than Ferreira do Zêzere that is in Santarém district

In [535]:
mapping = {}
mapping_2 = {}

In [536]:
mappings = [mapping, mapping_2]

Versions:
* Assign too the biggest number of municipalities possible included in the name, proportionally to their pastures' land
* Assign only to municipalities that adopted during PCF, proportionally to their pastures land. 
    * If none, assign to all the rest proportionally to pasture land
    * (NOT IMPLEMENTED If none, assign all to the biggest)
* (NOT IMPLEMENTED Assign only to municipalities that adopted during PCF, proportionally to their total adoption during PCF project)

### Unclear ones

In [537]:
mapping['Montemor'] = ['Montemor-o-Novo']

In [538]:
mapping['Ferreira'] = ['Ferreira do Alentejo']

### Single municipalities

In [539]:
single_municipalities = ['Odemira', 'Estremoz', 'Elvas', 'Ponte de Sor', 'Coruche', 'Tomar', 'Abrantes']

In [540]:
for munic in single_municipalities:
    mapping[munic] = [munic]

In [541]:
mapping_2 = mapping.copy()

### Districts

In [542]:
districts = ['Aveiro', 'Coimbra', 'Guarda', 'Viseu', 'Portalegre', 'Beja', 'Castelo Branco', 'Évora', 'Santarém', 'Leiria']

In [543]:
munic_already_included = single_municipalities + ['Montemor-o-Novo', 'Ferreira do Alentejo']
for dist in districts:
    munic_in_distr = shapefile_data.loc[shapefile_data['District'] == dist].index.to_list()
    munic_to_include = [munic for munic in munic_in_distr if munic not in munic_already_included]
    mapping[dist] = munic_to_include

#### Version 2

In [544]:
munic_already_included = single_municipalities + ['Montemor-o-Novo', 'Ferreira do Alentejo']
for dist in districts:
    munic_in_distr = shapefile_data.loc[shapefile_data['District'] == dist].index.to_list()
    munic_to_include = [munic for munic in munic_in_distr if (munic not in munic_already_included) and (munic in corrected_counties_PCF)]
    mapping_2[dist] = munic_to_include
    if len(mapping_2[dist]) == 0:
        munic_to_include = [munic for munic in munic_in_distr if (munic not in munic_already_included)]
        mapping_2[dist] = munic_to_include

### Regions

In [545]:
def include_in_mapping_2(region):
    munic_to_include_2 = [munic for munic in munic_in_region if 
                          (munic not in municipalities_already_in_mapping_2) and (munic in corrected_counties_PCF)]
    mapping_2[region] = munic_to_include_2
    print(len(mapping_2[region]))
    if len(mapping_2[region]) == 0:
        mapping_2[region] = munic_to_include
        print(len(mapping_2[region]))

In [546]:
# To avoid to insert duplicates with the regions
municipalities_already_in_mapping = [munic for mapped in mapping.values() for munic in mapped]
municipalities_already_in_mapping_2 = [munic for mapped in mapping_2.values() for munic in mapped]

In [547]:
shapefile_data.loc[shapefile_data['District'] == 'Leiria'].index.unique()

Index(['Óbidos', 'Alcobaça', 'Alvaiázere', 'Ansião', 'Batalha', 'Bombarral',
       'Caldas da Rainha', 'Castanheira de Pêra', 'Figueiró dos Vinhos',
       'Leiria', 'Marinha Grande', 'Nazaré', 'Pedrógão Grande', 'Peniche',
       'Pombal', 'Porto de Mós'],
      dtype='object', name='Municipality')

In [548]:
munic_in_oeste = ['Alcobaça', 'Alenquer', 'Arruda dos Vinhos', 'Bombarral', 'Cadaval', 'Caldas da Rainha', 'Lourinhã', 
                    'Nazaré', 'Óbidos', 'Peniche', 'Sobral de Monte Agraço', 'Torres Vedras']
munic_in_ribatejo = ['Azambuja', 'Vila Franca de Xira', 'Alcochete', 'Montijo', 'Moita']
munic_in_region = munic_in_oeste + munic_in_ribatejo

munic_to_include = [munic for munic in munic_in_region if munic not in municipalities_already_in_mapping]
mapping['Oeste'] = munic_to_include

In [549]:
include_in_mapping_2('Oeste')

6


In [550]:
munic_leir = shapefile_data.loc[shapefile_data['District'] == 'Leiria'].index.to_list()
munic_oeste_in_leir=[munic for munic in munic_in_region if munic in munic_leir]
munic_oeste_in_leir

['Alcobaça', 'Bombarral', 'Caldas da Rainha', 'Nazaré', 'Óbidos', 'Peniche']

In [551]:
faro_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Faro'].index.to_list()

munic_in_region = faro_municipalities

munic_to_include = [munic for munic in munic_in_region if munic not in municipalities_already_in_mapping]
mapping['Algarve'] = munic_to_include

In [552]:
include_in_mapping_2('Algarve')

0
16


In [553]:
braganca_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Bragança'].index.to_list()
vilareal_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Vila Real'].index.to_list()
viseu_municipalities_in_tom = ['Armamar', 'Lamego', 'Tabuaço', 'São João da Pesqueira']
guarda_municipalities_in_tom = ['Vila Nova de Foz Côa']

munic_in_region = (braganca_municipalities + vilareal_municipalities 
                    + viseu_municipalities_in_tom + guarda_municipalities_in_tom)

munic_to_include = [munic for munic in munic_in_region if munic not in municipalities_already_in_mapping]
mapping['Trás-os-Montes'] = munic_to_include

In [554]:
include_in_mapping_2('Trás-os-Montes')

2


In [555]:
madeira_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Madeira'].index.to_list()
azores_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Azores'].index.to_list()

munic_in_region = madeira_municipalities + azores_municipalities
munic_to_include = [munic for munic in munic_in_region if munic not in municipalities_already_in_mapping]
mapping['Madeira + Azores'] = munic_in_region

In [556]:
include_in_mapping_2('Madeira + Azores')

0
30


In [557]:
braga_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Braga'].index.to_list()
vianadocastelo_municipalities = shapefile_data.loc[shapefile_data['District'] == 'Viana do Castelo'].index.to_list()

munic_in_region = braga_municipalities + vianadocastelo_municipalities
munic_to_include = [munic for munic in munic_in_region if munic not in municipalities_already_in_mapping]
mapping['Minho'] = munic_in_region

In [558]:
include_in_mapping_2('Minho')

0
24


### Checks

If the keys of the mapping coincide with the original regions

In [559]:
mapping_keys = list(mapping.keys())
set(mapping_keys) == set(regions_prePCF)

True

In [560]:
mapping_2_keys = list(mapping_2.keys())
set(mapping_2_keys) == set(regions_prePCF)

True

Check on municipalities: duplicates inserted with the regions

In [561]:
municipalities_in_mapping = [munic for mapped in mapping.values() for munic in mapped]

In [562]:
len(municipalities_in_mapping)

272

In [563]:
len(set(municipalities_in_mapping))

270

In [564]:
import collections

In [565]:
counter = collections.Counter(municipalities_in_mapping)
duplicates = {k: v for k, v in counter.items() if v > 1}
duplicates

{'Lagoa': 2, 'Calheta': 2}

Lagoa and Calheta are fine, since there are actually two for each. Therefore no duplicates are present

#### Check mapping_2 municipalities are the same as in PCF database + some added

In [596]:
mapping_2_munic = [munic for mapped in mapping_2.values() for munic in mapped]
len(mapping_2_munic)

216

In [597]:
len(counties_PCF)

86

## Disaggregation of adopted area based on pasture area in each municipality

**NOTE:** need to exclude from this analysis, apart from Azores and Madeira, also Algarve since no data in the census on that

In [566]:
shapefile_in_madeira_azores = shapefile_data.loc[(shapefile_data['District'] == 'Azores') 
                                                      | (shapefile_data['District'] == 'Madeira')]
municipalities_in_madeira_azores = shapefile_in_madeira_azores.index.tolist()

In [567]:
shapefile_in_algarve = shapefile_data.loc[shapefile_data['District'] == 'Faro']
municipalities_in_algarve = shapefile_in_algarve.index.tolist()

In [568]:
municipalities_to_remove = municipalities_in_madeira_azores + municipalities_in_algarve

In [569]:
mapping_restr = {}
for key, munic_list in mapping.items():
    mapping_restr[key] = [munic for munic in munic_list if munic not in municipalities_to_remove]

In [570]:
mapping_restr_2 = {}
for key, munic_list in mapping_2.items():
    mapping_restr_2[key] = [munic for munic in munic_list if munic not in municipalities_to_remove]

In [571]:
municipalities_remained_in_mapping_1 = [munic for mapped in mapping_restr.values() for munic in mapped]
len(municipalities_remained_in_mapping_1)

226

In [572]:
municipalities_remained_in_mapping_2 = [munic for mapped in mapping_restr_2.values() for munic in mapped]
len(municipalities_remained_in_mapping_2)

170

In [573]:
regions_to_remove = [region for region in mapping_restr.keys() if mapping_restr[region] == []]

In [574]:
regions_to_remove_2 = [region for region in mapping_restr_2.keys() if mapping_restr_2[region] == []]

In [575]:
for region in regions_to_remove:
    mapping_restr.pop(region)

In [576]:
for region in regions_to_remove_2:
    mapping_restr_2.pop(region)

### Load census data

In [577]:
path_to_municipalities_pasture_area = "./Census 1999/municipalities_permanent_pastures_area.csv"

In [578]:
municipalities_pastures_area = pd.read_csv(path_to_municipalities_pasture_area, index_col=0)

Check that the municipalities are the same as the other census data already loaded (+ Porto and Lisboa)

**NOTE:** after checking here, the correction has been applied to the dataset before saving it the relative notebook

### Disaggregation

Version 1

In [579]:
mapping = mapping_restr
out_path_adoption_pre_PCF = (
    "./Terraprima - PCF/From spatial granularity harmonization/SBP adoption previous to 2009 per municipality.xlsx"
    )

Version 2

Get total pastures area for the municipalities in each region

In [580]:
calc_area = pd.DataFrame(index=mapping.keys())
calc_area['Municipalities'] = mapping.values()
calc_area.index.name = 'Region'

  return array(a, dtype, copy=False, order=order)


In [581]:
for region in calc_area.index.tolist():
    calc_area.loc[region, 'Total pasture area'] = sum([municipalities_pastures_area.loc[munic, 'pastures_area_munic'] 
                                                 for munic in calc_area.loc[region, 'Municipalities']])

Append a column with the region corresponding to the municipality to "municipalities_pastures_area"

In [582]:
municipalities_pastures_area['Region'] = ''
for munic in municipalities_pastures_area.index.tolist():
    mask = calc_area['Municipalities'].apply(lambda x: munic in x)
    try:
        region = calc_area[mask].index.values[0]
    except IndexError:
        region = float('NaN')
    municipalities_pastures_area['Region'].loc[munic] = region

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [583]:
len(municipalities_pastures_area[municipalities_pastures_area['Region'].notnull()])

226

Makes sense, of the 262 municipalities considered (exluding Madeira, Azores and Algarve), 221 are mapped to a region in the PCF and will have a non null adoption (in fact the municipalities mapped to a region were 221). The remaining will not have any adoption.

Calculate coefficients of area percentage for each municipality

In [584]:
municipalities_pastures_area['Area coefficient'] = ''
for munic in municipalities_pastures_area.index.tolist():
    if pd.isnull(municipalities_pastures_area.loc[munic, 'Region']):
        coef = 0
    else:
        coef = (municipalities_pastures_area.loc[munic, 'pastures_area_munic'] 
                / calc_area.loc[municipalities_pastures_area.loc[munic, 'Region'], 'Total pasture area'])
    municipalities_pastures_area['Area coefficient'].loc[munic] = coef

In [585]:
# Check that sum of indices = 1 for all regions
(abs(municipalities_pastures_area[['Region', 'Area coefficient']].groupby('Region').sum() - 1) < 0.0001).all()

Area coefficient    True
dtype: bool

Disaggregate the adoption per year with the coefficients

In [586]:
#indexes = [munic for munic in municipalities_pastures_area.index if munic not in ['Lisboa', 'Porto', 'São João da Madeira']]
indexes = municipalities_pastures_area.index

In [587]:
adoption_pre_PCF_munic = pd.DataFrame(index=indexes, columns=adoption_pre_PCF.columns)

In [588]:
for col in adoption_pre_PCF.columns.tolist():
    for munic in adoption_pre_PCF_munic.index.tolist():
        if municipalities_pastures_area.loc[munic, 'Area coefficient'] == 0:
            adoption_pre_PCF_munic.loc[munic, col] = 0
        else:       
            adoption_pre_PCF_munic.loc[munic, col] = (adoption_pre_PCF.loc[municipalities_pastures_area.loc[munic, 'Region'], col] 
                                                      * municipalities_pastures_area.loc[munic, 'Area coefficient'])

Check if total area remained the same

In [589]:
(adoption_pre_PCF.sum()-adoption_pre_PCF.loc[['Algarve', 'Madeira + Azores']].sum()).sum()

83057.5

In [590]:
adoption_pre_PCF_munic.sum().sum()

83057.49999999999

## Save disaggregated dataset

In [591]:
adoption_pre_PCF_munic.index.name = 'Municipality'

In [592]:
adoption_pre_PCF_munic.to_excel(out_path_adoption_pre_PCF)

In [593]:
adoption_pre_PCF_munic

Unnamed: 0_level_0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alcácer do Sal,0,0,0,0,0,0,0,0,0,0,0,0,0
Grândola,0,0,0,0,0,0,0,0,0,0,0,0,0
Odemira,34,110,272,237,554,726,846,1672,877,455,462,679,879
Santiago do Cacém,0,0,0,0,0,0,0,0,0,0,0,0,0
Sines,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Murça,0.00981006,0.0245252,0.156961,0.171676,0.147151,0.201106,0.372782,0.436548,0.539554,0.338447,0.412023,0.588604,0.475788
Valpaços,0.0623654,0.155913,0.997846,1.09139,0.935481,1.27849,2.36988,2.77526,3.4301,2.15161,2.61935,3.74192,3.02472
Vila Pouca de Aguiar,0.1131,0.282749,1.80959,1.97924,1.6965,2.31854,4.29779,5.03294,6.22048,3.90194,4.75019,6.78598,5.48533
Vimioso,0.0563932,0.140983,0.902291,0.986881,0.845898,1.15606,2.14294,2.5095,3.10163,1.94556,2.36851,3.38359,2.73507


# Export of shapefile for ABM

In [35]:
munic_to_excl_more = ['Lisboa', 'Porto', 'São João da Madeira']

In [37]:
shapefile_data_excl.drop(munic_to_excl_more, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [39]:
out_path = "./counties_shp/Shapefile for ABM/shapefile_for_munic_abm.shp"

In [40]:
shapefile_data_excl.to_file(out_path)