In [1]:
import altair as alt
import math
import pandas as pd
import pycountry
from vega_datasets import data

def read_cow_excel(file_location, state_name_columns, start_year_column):
    df = pd.read_excel(file_location)
    df = df.loc[df[start_year_column] > 1944]
    
    for state_name_column in state_name_columns:
        df[state_name_column] = df[state_name_column].apply(lambda x: clean_us_name(x))
        
    df = df.loc[filter_by_country_name(df, state_name_columns, "USA")]
    
    df = build_refined_df(df)
    
    return df
           
def clean_us_name(item):
    if item == "United States" or item == "United States of America":
        return "USA"
    return item

def filter_by_country_name(df, state_name_columns, country_name):
    row_filter = df.index == None
    
    for state_name_column in state_name_columns:
        row_filter = row_filter | (df[state_name_column] == country_name)
        
    return row_filter

def build_refined_df(df):
    new_df = df[["WarName", "WarType"]]
    if "StartYear1" in df.columns:
        new_df["StartYear1"] = df["StartYear1"]
        new_df["EndYear1"] = df["EndYear1"]
    else:
        new_df["StartYear1"] = df["StartYr1"]
        new_df["EndYear1"] = df["EndYr1"]
        
    
        
    return new_df

def merge_conflicts(df, names, new_name):
    first_row = df.loc[df["WarName"] == names[0]]
    second_row = df.loc[df["WarName"] == names[1]]
    print(first_row["StartYear1"])
    
    
    

include_folder = "C:/Users/mille/PycharmProjects/VisualizingPower/venv/Include/"
inter_file_location = include_folder + "Inter-StateWarData_v4.0.xlsx"
extra_file_location = include_folder + "Extra-StateWarData_v4.0.xlsx"
intra_file_location = include_folder + "INTRA-STATE_State_participants v5.1.xlsx"

inter_state_name_columns = ["StateName"]
extra_state_name_columns = ["SideA", "SideB"]
intra_state_name_columns = ["SideA", "SideB"]

inter_df = read_cow_excel(inter_file_location, inter_state_name_columns, "StartYear1")
extra_df = read_cow_excel(extra_file_location, extra_state_name_columns, "StartYear1")
intra_df = read_cow_excel(intra_file_location, intra_state_name_columns, "StartYr1")

df = pd.concat([inter_df, extra_df, intra_df])

merge_conflicts(df, ["Vietnam War, Phase 2", "Vietnam War phase 1 of 1960-1965"], "Vietnam War")

alt.Chart(df).mark_bar().encode(
    alt.X('StartYear1:Q', scale=alt.Scale(
            domain=(1950, 2020),
            clamp=True
        )),
    x2='EndYear1:Q',
    y='WarName:O'
).properties(
    width=1000,
    height=600,
    title='Timeline of US Military Interventions'
)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["StartYear1"] = df["StartYear1"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["EndYear1"] = df["EndYear1"]


234    1965
Name: StartYear1, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["StartYear1"] = df["StartYr1"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["EndYear1"] = df["EndYr1"]
