In [1]:
import os, pandas as pd
from dotenv import load_dotenv
from utility import Utils as U
import sqlalchemy as db

### Data source is currently a CSV
### Later on this will be improved to be a webscraper or a data dump to an S3 bucket

In [2]:
UPR_cycle = pd.read_excel(r'UPR_cycle2.xlsx')
tags_matrix = pd.read_excel(r'themes_to_tags.xlsx')

In [3]:
UPR_cycle.index+=1
UPR_cycle.rename(columns={"Reccomending Body": "Recommending Body", "UPR Reccomending States": "UPR Recommending States"}, inplace=True)

# Data Wrangling

##### - Check unique number of text & annotation id to establish primary key
##### - Both have 625 unique entries which is the same as the total database entries
##### - Normalise by removing one of these, two identifiers is uneccesary we are not gaining anymore information

In [4]:
text_count = UPR_cycle.Text.unique().size
print(text_count)

625


In [5]:
annotation_id_count = UPR_cycle["OHCHR Annotation Id"].unique().size
print(annotation_id_count)

625


##### Columns to be dropped
##### Annotation ID, Recommending body, Text, Affected persons, Sdgs, Document publication date, session, date
##### check all entries are recommendation by seeing if text starts with number, then drop this column. 

In [6]:
drop_columns = ["OHCHR Annotation Id","Recommending Body", "Document Publication Date", "Sdgs", "UPR Session", "Date of publication on UHRI", "Affected Persons"]
UPR_cycle.drop(drop_columns, inplace=True, axis=1)

In [7]:
UPR_cycle = UPR_cycle[UPR_cycle['Text'].apply(lambda x: U.has_numbers(x))]
UPR_cycle.drop(['Type'], inplace=True, axis=1)
UPR_cycle.index =UPR_cycle.index.set_names(['Recommendation_id'])

#### Standardize the naming convention, by removing hyphens and spaces, of all data entries.

In [8]:
UPR_cycle_cleaned = U.add_empty_cell_string(UPR_cycle)
UPR_cycle_cleaned['Themes'] = UPR_cycle_cleaned['Themes'].str.split('\n')
UPR_cycle_unnested = UPR_cycle_cleaned.explode('Themes')

In [9]:
UPR_cycle_unnested = UPR_cycle_unnested.applymap(lambda x: U.remove_leading_char(str(x)))
UPR_cycle_unnested['Recommending Regions'] = UPR_cycle_unnested['Recommending Regions'].str.split('\n')
UPR_cycle_unnested['UPR Recommending States'] = UPR_cycle_unnested['UPR Recommending States'].str.split('\n')
UPR_cycle_unnested = UPR_cycle_unnested.explode(['UPR Recommending States', 'Recommending Regions'])
UPR_cycle_unnested = UPR_cycle_unnested.applymap(lambda x: U.remove_leading_char(str(x)))

#### Define all staging schema tables 
* Recommendations
* Countries
* Regions
* Themes
* Tags

In [10]:
Recommendations_df = UPR_cycle[['Text', 'UPR Position']].applymap(lambda x: U.remove_leading_char(str(x)))
Recommendations_df= Recommendations_df.reset_index(level=0)

In [11]:
c1 = UPR_cycle_unnested[['Countries Concerned','Regions Concerned']].rename(columns={"Countries Concerned":"Country name", "Regions Concerned": "Regions"})
c2 = UPR_cycle_unnested[['UPR Recommending States', 'Recommending Regions']].rename(columns={"UPR Recommending States":"Country name", "Recommending Regions": "Regions"})
Countries_df = pd.concat([c1,c2], ignore_index=True).drop_duplicates(subset=['Country name'], keep='first')
Countries_df = Countries_df.reset_index(drop='True')
Countries_df.index = Countries_df.index.set_names(['Country id'])
Countries_df.index+=1
Countries_df = Countries_df.reset_index()

In [12]:
Regions_df = pd.DataFrame(Countries_df['Regions'].unique(), columns=['Regions'])
Regions_df.index = Regions_df.index.set_names(['Region id'])
Regions_df.index+=1
Regions_df = Regions_df.reset_index(level=0)

In [13]:
Themes_df = pd.DataFrame(UPR_cycle_unnested['Themes'].unique(), columns=['Themes'])
Themes_df.index = Themes_df.index.set_names(['Theme id'])
Themes_df.index+=1
Themes_df = Themes_df.reset_index(level=0)

In [14]:
tags_df = pd.DataFrame(tags_matrix.columns,columns=['Tags'] )
tags_df.index = tags_df.index.set_names(['Tag id'])
tags_df.index +=1
tags_df = tags_df.reset_index()

### Standardized Dataframes

In [15]:
Recommendations_df

Unnamed: 0,Recommendation_id,Text,UPR Position
0,1,Promptly ratify the Optional Protocol to the C...,Noted
1,2,Ensure full and equal access to modern contrac...,Supported
2,3,Protect the rights of indigenous peoples throu...,Supported
3,4,Continue enhancing the school infrastructure f...,Supported
4,5,Continue implementing mechanisms for the prior...,Supported
...,...,...,...
620,621,Adopt legislation to guarantee the fulfilment ...,Supported
621,622,Establish clear consultation procedures in ord...,Noted
622,623,Consider ratifying ILO Convention N° 169 ( Nor...,Supported
623,624,Extend an invitation to the Working Group on e...,Supported


In [16]:
Countries_df

Unnamed: 0,Country id,Country name,Regions
0,1,Uganda,Africa
1,2,Venezuela (Bolivarian Republic of),Latin America and the Caribbean
2,3,Hungary,Eastern Europe
3,4,Ireland,Western Europe & Others
4,5,Thailand,Asia-Pacific
...,...,...,...
144,145,Togo,Africa
145,146,Montenegro,Eastern Europe
146,147,Chad,Africa
147,148,Oman,Asia-Pacific


In [17]:
Regions_df

Unnamed: 0,Region id,Regions
0,1,Africa
1,2,Latin America and the Caribbean
2,3,Eastern Europe
3,4,Western Europe & Others
4,5,Asia-Pacific
5,6,Missing data


In [18]:
Themes_df

Unnamed: 0,Theme id,Themes
0,1,Labour rights and right to work
1,2,Ratification of & accession to international i...
2,3,Prohibition of torture & ill-treatment (includ...
3,4,Sexual & reproductive health and rights
4,5,Right to participate in public affairs & right...
...,...,...
63,64,Good governance & corruption
64,65,Private life & privacy
65,66,Right to be recognized as a person before the law
66,67,Trade union rights


In [19]:
tags_df

Unnamed: 0,Tag id,Tags
0,1,Scope of international obligations and coopera...
1,2,National human rights framework
2,3,Development
3,4,Environment
4,5,Business and human rights
5,6,Human rights and counter-terrorism
6,7,Right to land
7,8,Civil & political rights - general measures of...
8,9,Equality and non-discrimination
9,10,"Right to life, liberty and security of person"


In [20]:
tags_matrix

Unnamed: 0,Scope of international obligations and cooperation with international human rights mechanisms and bodies,National human rights framework,Development,Environment,Business and human rights,Human rights and counter-terrorism,Right to land,Civil & political rights - general measures of implementation,Equality and non-discrimination,"Right to life, liberty and security of person",...,Right to work and to just and favourable conditions of work,Right to social security,Right to health and to traditional health practices,Right to education and control over educational systems,"Right to develop their culture, religion and language and non-assimilation",Women,Children,Persons with disabilities,"Migrants, refugees, asylum seekers and internally displaced persons",Stateless persons
0,Scope of international obligations,Legal and institutional reform,Right to development,Human rights & the environment,Business & Human Rights,Human rights & counter-terrorism,Land & property rights,Civil & political rights - general measures of...,Equality & non-discrimination,Right to physical & moral integrity,...,Labour rights and right to work,Right to social security,Right to health,Right to education,Cultural rights,Advancement of women,Children: definition; general principles; prot...,"Persons with disabilities: definition, general...",,
1,Cooperation with human rights mechanisms & req...,"Human rights education, trainings & awareness-...",Economic policy and foreign debt,Human rights & toxics/hazardous wastes,,,,,Racial discrimination,Liberty & security of the person,...,Trade union rights,,Sexual & reproductive health and rights,,,Discrimination against women,Children: family environment & alternative care,"Persons with disabilities: accessibility, mobi...",,
2,Inter-State cooperation and assistance,Constitutional & legislative framework,2030 Agenda & other voluntary commitments,Human rights & climate change,,,,,Persons deprived of liberty: definition of tor...,Right to life,...,,,,,,Violence against women,Children: protection against exploitation,Persons with disabilities: protection against ...,,
3,Ratification of & accession to international i...,National Human Rights Action Plans (or specifi...,Unilateral coercive measures,"Economic & political crisis, natural disasters...",,,,,Persons deprived of liberty: concept of places...,"Extrajudicial, summary or arbitrary executions",...,,,,,,Participation of women in political & public life,Children: juvenile justice,Persons with disabilities: protecting the inte...,,
4,Reservations,National Mechanisms for Reporting & Follow-up ...,,,,,,,,Death penalty,...,,,,,,Sexual & gender-based violence,,"Persons with disabilities: independence, inclu...",,
5,Derogation & state of emergency,National Human Rights Institution (NHRI)_x000D...,,,,,,,,Prohibition of torture & ill-treatment (includ...,...,,,,,,,,Persons with disabilities: protection and safe...,,
6,Cooperation & follow up with Treaty Bodies,National Preventive Mechanism (NPM),,,,,,,,Conditions of detention,...,,,,,,,,,,
7,Cooperation & follow up with Special Procedures,Cooperation & consultation with civil society,,,,,,,,Enforced disappearances,...,,,,,,,,,,
8,Cooperation & follow up with the Universal Per...,Data collection & research,,,,,,,,Arbitrary arrest & detention,...,,,,,,,,,,
9,Cooperation with international organizations,Budget & resources (for human rights implement...,,,,,,,,Use of mercenaries/private security,...,,,,,,,,,,


#### Upload tables to staging schema

add in secrets to obscure db connection path

In [21]:
load_dotenv()
conn = U.create_db_connection(os.getenv('username'), os.getenv('password'),os.getenv('db_host'),os.getenv('db_name') )

In [None]:
# this is a bit repetitive  is there a better solution?

In [25]:
U.upload_staging_data(Countries_df, conn, 'Countries', 'Staging')
U.upload_staging_data(Themes_df, conn, 'Themes', 'Staging')
U.upload_staging_data(Regions_df, conn, 'Regions', 'Staging')
U.upload_staging_data(Recommendations_df, conn, 'Recommendations', 'Staging')
U.upload_staging_data(tags_df, conn, 'Tags', 'Staging')
U.upload_staging_data(tags_matrix, conn, 'TagsMatrix', 'Staging')

12