# ALLIANCE Data Transformation

Task: use Pandas to transform csv files into DataFrames that match desired tables for database schema

Tables:

- STATE_ALLIANCE
- ALLIANCE_MEMBERSHIP
- ALLIANCE_TRAITS

In [1]:
import pandas as pd
import numpy as np

In [2]:
dfAllianceAll = pd.read_csv('../SourceData/CorrelatesOfWar/alliance_v4.1_by_member.csv', encoding='utf-8')
dfAllianceAll.columns

Index(['version4id', 'ccode', 'state_name', 'all_st_day', 'all_st_month',
       'all_st_year', 'all_end_day', 'all_end_month', 'all_end_year',
       'ss_type', 'mem_st_day', 'mem_st_month', 'mem_st_year', 'mem_end_day',
       'mem_end_month', 'mem_end_year', 'left_censor', 'right_censor',
       'defense', 'neutrality', 'nonaggression', 'entente', 'version'],
      dtype='object')

## Create 'STATE_ALLIANCE' table

task: transform 'alliance_v4.1_by_member.csv' (saved as 'dfAllianceAll') into a table with the following attributes:

- AllianceID
- AllianceType
- StartDate
- EndDate

In [3]:
dfAlliance = dfAllianceAll[['version4id', 'all_st_day', 'all_st_month', 'all_st_year', 'all_end_day', 
                            'all_end_month', 'all_end_year', 'ss_type']].copy() \
        .rename(columns={'version4id':'AllianceID', 'all_st_day':'StartDay', 'all_st_month':'StartMonth',
       'all_st_year':'StartYear', 'all_end_day':'EndDay', 'all_end_month':'EndMonth', 'all_end_year':'EndYear',
       'ss_type':'AllianceType'}) \
        .drop_duplicates()

dfAlliance['StartDate'] = pd.to_datetime(dict(year=dfAlliance.StartYear, month=dfAlliance.StartMonth, day=dfAlliance.StartDay), errors='ignore')
dfAlliance['EndDate'] = pd.to_datetime(dict(year=dfAlliance.EndYear, month=dfAlliance.EndMonth, day=dfAlliance.EndDay), errors='coerce')
dfAlliance['StartDate'] = dfAlliance['StartDate'].apply(lambda x: x.strftime('%Y-%m-%d'))

dfAlliance = dfAlliance[['AllianceID', 'AllianceType', 'StartDate', 'EndDate']]
dfAlliance

Unnamed: 0,AllianceID,AllianceType,StartDate,EndDate
0,1,Type I: Defense Pact,1386-05-09,NaT
2,2,Type III: Entente,1766-02-05,1911-02-15
4,3,Type I: Defense Pact,1815-06-08,1866-06-15
24,4,Type I: Defense Pact,1815-06-12,1820-07-13
26,5,Type I: Defense Pact,1815-06-12,1848-03-29
...,...,...,...,...
1211,410,Type I: Defense Pact,2009-02-03,NaT
1213,411,Type I: Defense Pact,2010-08-20,NaT
1215,412,Type IIb: Non-Aggression Pact,2012-02-10,2012-03-27
1217,413,Type IIb: Non-Aggression Pact,1979-03-26,NaT


In [4]:
dfAlliance.to_csv('../FinalData/state_alliance.csv', encoding='utf-8', index=False)

## Create 'ALLIANCE_MEMBERSHIP' table

task: transform 'alliance_v4.1_by_member.csv' (saved as 'dfAllianceAll') into a table with the following attributes:

- AllianceID
- StateID
- StartDate
- EndDate
- StartYear
- StartMonth
- StartDay
- EndYear
- EndMonth
- EndDay

In [5]:
dfAllianceMem = dfAllianceAll[['version4id', 'ccode', 'mem_st_day', 'mem_st_month', 'mem_st_year', 'mem_end_day', 
                               'mem_end_month', 'mem_end_year', 'defense', 'neutrality', 'nonaggression', 'entente']].copy() \
    .rename(columns={'version4id':'AllianceID', 'ccode':'StateID', 'mem_st_day':'StartDay', 
                              'mem_st_month':'StartMonth', 'mem_st_year':'StartYear', 'mem_end_day':'EndDay', 
                              'mem_end_month':'EndMonth', 'mem_end_year':'EndYear'}) \
    .drop_duplicates()

dfAllianceMem['StartDate'] = pd.to_datetime(dict(year=dfAllianceMem.StartYear, month=dfAllianceMem.StartMonth, day=dfAllianceMem.StartDay), errors='ignore')
dfAllianceMem['EndDate'] = pd.to_datetime(dict(year=dfAllianceMem.EndYear, month=dfAllianceMem.EndMonth, day=dfAllianceMem.EndDay), errors='coerce')

dfAllianceMembership = dfAllianceMem[['AllianceID', 'StateID', 'StartDate', 'EndDate']].copy()

dfAllianceMembership

Unnamed: 0,AllianceID,StateID,StartDate,EndDate
0,1,200,1816-01-01,NaT
1,1,235,1816-01-01,NaT
2,2,200,1816-01-01,1911-02-15
3,2,380,1816-01-01,1911-02-15
4,3,240,1838-01-01,1848-03-15
...,...,...,...,...
1216,412,626,2012-02-10,2012-03-27
1217,413,651,1979-03-26,NaT
1218,413,666,1979-03-26,NaT
1219,414,2,1981-11-30,1991-12-26


In [6]:
dfAllianceMembership.to_csv('../FinalData/alliance_membership.csv', encoding='utf-8', index=False)

## Create 'ALLIANCE_TRAITS' table

task: transform 'alliance_v4.1_by_member.csv' (saved as 'dfAllianceAll') into a table with the following attributes:

- AllianceID
- StateID
- StartDate
- Trait

In [7]:
dfAllianceTraits = dfAllianceMem[['AllianceID', 'StateID', 'StartDate', 'defense', 'neutrality', 'nonaggression', 'entente']].copy()
dfAllianceTraits['nonaggression'] = dfAllianceTraits['nonaggression'].fillna(0).astype(int)
dfAllianceTraits['entente'] = dfAllianceTraits['entente'].fillna(0).astype(int)
dfAllianceTraits

Unnamed: 0,AllianceID,StateID,StartDate,defense,neutrality,nonaggression,entente
0,1,200,1816-01-01,1,0,1,0
1,1,235,1816-01-01,1,0,1,0
2,2,200,1816-01-01,0,0,0,1
3,2,380,1816-01-01,0,0,0,1
4,3,240,1838-01-01,1,0,1,1
...,...,...,...,...,...,...,...
1216,412,626,2012-02-10,0,0,1,0
1217,413,651,1979-03-26,0,0,1,0
1218,413,666,1979-03-26,0,0,1,0
1219,414,2,1981-11-30,0,0,0,1


In [8]:
dummy_cols = ['defense', 'neutrality', 'nonaggression', 'entente']
for c in dummy_cols:
    dfAllianceTraits[c] = np.where(dfAllianceTraits[c]==1, c, None)

dfAllianceTraits['Trait'] = dfAllianceTraits[dummy_cols].values.tolist()
dfAllianceTraits = dfAllianceTraits.drop(columns=dummy_cols) \
                    .explode('Trait') \
                    .dropna()

dfAllianceTraits

Unnamed: 0,AllianceID,StateID,StartDate,Trait
0,1,200,1816-01-01,defense
0,1,200,1816-01-01,nonaggression
1,1,235,1816-01-01,defense
1,1,235,1816-01-01,nonaggression
2,2,200,1816-01-01,entente
...,...,...,...,...
1216,412,626,2012-02-10,nonaggression
1217,413,651,1979-03-26,nonaggression
1218,413,666,1979-03-26,nonaggression
1219,414,2,1981-11-30,entente


In [9]:
dfAllianceTraits.to_csv('../FinalData/alliance_traits.csv', encoding='utf-8', index=False)