In [None]:
from google.colab import files
uploaded = files.upload()

Saving war_events.csv to war_events.csv


In [None]:
import pandas as pd

# Upload datasets (you can also mount Google Drive if needed)
war_df = pd.read_csv("/content/war_events.csv")
refugee_df = pd.read_csv("/content/refugees.csv")
economy_df = pd.read_csv("/content/economy.csv")

print("War Events Data Sample:\n", war_df.head())
print("\nRefugee Data Sample:\n", refugee_df.head())
print("\nEconomic Data Sample:\n", economy_df.head())


War Events Data Sample:
   event_id        date  location     event_type  casualties    source
0    E1000  2022-02-26    Dnipro       Shelling          74  Source C
1    E1001  2022-02-27      Kyiv      Airstrike          87  Source D
2    E1002  2022-02-28    Dnipro      Airstrike         116  Source D
3    E1003  2022-02-26  Mariupol  Ground Attack          99  Source C
4    E1004  2022-03-05   Kharkiv  Ground Attack         103  Source D

Refugee Data Sample:
   refugee_id        date origin_country destination_country  refugee_count
0      R2000  2022-02-25           Kyiv             Germany           7049
1      R2001  2022-02-26           Lviv             Germany           2533
2      R2002  2022-02-24        Donetsk             Germany           5411
3      R2003  2022-03-01         Odessa              Poland           5151
4      R2004  2022-03-04        Kharkiv            Slovakia           6520

Economic Data Sample:
    country        date  gdp_change  inflation_rate  unempl

In [None]:
# Merge on date and location/origin_country
merged_df = pd.merge(war_df, refugee_df,
                     left_on=["date", "location"],
                     right_on=["date", "origin_country"],
                     how="left")

print("\nAfter Merging War and Refugee Data:\n", merged_df.head())

# Handling missing refugee movement data
# Fill NaNs in refugee_count with 0
merged_df["refugee_count"] = merged_df["refugee_count"].fillna(0)
print("\nFilled missing refugee_count with 0.")



After Merging War and Refugee Data:
   event_id        date  location     event_type  casualties    source  \
0    E1000  2022-02-26    Dnipro       Shelling          74  Source C   
1    E1001  2022-02-27      Kyiv      Airstrike          87  Source D   
2    E1002  2022-02-28    Dnipro      Airstrike         116  Source D   
3    E1003  2022-02-26  Mariupol  Ground Attack          99  Source C   
4    E1004  2022-03-05   Kharkiv  Ground Attack         103  Source D   

  refugee_id origin_country destination_country  refugee_count  
0        NaN            NaN                 NaN            NaN  
1        NaN            NaN                 NaN            NaN  
2        NaN            NaN                 NaN            NaN  
3        NaN            NaN                 NaN            NaN  
4        NaN            NaN                 NaN            NaN  

Filled missing refugee_count with 0.


In [None]:
# First, map event locations to countries
location_to_country = {
    "Kyiv": "Ukraine", "Kiev": "Ukraine", "Kharkiv": "Ukraine",
    "Donetsk": "Ukraine", "Lviv": "Ukraine", "Odessa": "Ukraine",
    "Zaporizhzhia": "Ukraine", "Moscow": "Russia"
}
merged_df["country"] = merged_df["location"].map(location_to_country)

# Merge economic data
final_df = pd.merge(merged_df, economy_df, on=["date", "country"], how="left")

# Fill missing GDP and inflation values using forward fill
final_df["gdp_change"] = final_df["gdp_change"].fillna(method="ffill")
final_df["inflation_rate"] = final_df["inflation_rate"].fillna(method="ffill")

print("\nAfter Joining with Economic Data:\n", final_df.head())



After Joining with Economic Data:
   event_id        date  location     event_type  casualties    source  \
0    E1000  2022-02-26    Dnipro       Shelling          74  Source C   
1    E1001  2022-02-27      Kyiv      Airstrike          87  Source D   
2    E1002  2022-02-28    Dnipro      Airstrike         116  Source D   
3    E1003  2022-02-26  Mariupol  Ground Attack          99  Source C   
4    E1004  2022-03-05   Kharkiv  Ground Attack         103  Source D   

  refugee_id origin_country destination_country  refugee_count  country  \
0        NaN            NaN                 NaN            0.0      NaN   
1        NaN            NaN                 NaN            0.0  Ukraine   
2        NaN            NaN                 NaN            0.0      NaN   
3        NaN            NaN                 NaN            0.0      NaN   
4        NaN            NaN                 NaN            0.0  Ukraine   

   gdp_change  inflation_rate  unemployment_rate  
0         NaN          

  final_df["gdp_change"] = final_df["gdp_change"].fillna(method="ffill")
  final_df["inflation_rate"] = final_df["inflation_rate"].fillna(method="ffill")


In [None]:
# Standardize location names
final_df["location"] = final_df["location"].replace("Kiev", "Kyiv")
print("\nReplaced 'Kiev' with 'Kyiv' in location names.")



Replaced 'Kiev' with 'Kyiv' in location names.


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize casualties and refugee_count
scaler = MinMaxScaler()
final_df[["casualties_norm", "refugee_norm"]] = scaler.fit_transform(
    final_df[["casualties", "refugee_count"]]
)

# Assign weights to event types
event_weights = {
    "Battle": 1.0,
    "Shelling": 0.8,
    "Airstrike": 1.2,
    "Occupation": 0.5
}
final_df["event_weight"] = final_df["event_type"].map(event_weights)

# Create severity index
final_df["war_severity_index"] = (
    final_df["casualties_norm"] + final_df["refugee_norm"]
) * final_df["event_weight"]

print("\nAdded war_severity_index:\n", final_df[["event_type", "casualties", "refugee_count", "war_severity_index"]].head())



Added war_severity_index:
       event_type  casualties  refugee_count  war_severity_index
0       Shelling          74            0.0            0.389333
1      Airstrike          87            0.0            0.688000
2      Airstrike         116            0.0            0.920000
3  Ground Attack          99            0.0                 NaN
4  Ground Attack         103            0.0                 NaN


In [None]:
# Check for inconsistencies
print("Missing values:\n", final_df.isnull().sum())
print("\nDuplicates:\n", final_df.duplicated().sum())
print("\nData Types:\n", final_df.dtypes)

# Export to CSV
final_df.to_csv('final_war_analysis.csv', index=False)

# Download the final file
from google.colab import files
files.download('final_war_analysis.csv')


Missing values:
 event_id               0
date                   0
location               0
event_type             0
casualties             0
source                 0
refugee_id             9
destination_country    9
refugee_count          0
country                4
gdp_change             1
inflation_rate         1
unemployment_rate      4
casualties_norm        0
refugee_norm           0
event_weight           4
war_severity_index     4
dtype: int64

Duplicates:
 0

Data Types:
 event_id                object
date                    object
location                object
event_type              object
casualties               int64
source                  object
refugee_id              object
destination_country     object
refugee_count          float64
country                 object
gdp_change             float64
inflation_rate         float64
unemployment_rate      float64
casualties_norm        float64
refugee_norm           float64
event_weight           float64
war_severity_index 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>