In [1]:
import pandas as pd
import os

# Myanmar protests

## Load data from Google sheets

In [2]:
path = "data/raw/myanmar-protests/"
files = sorted([f"{path}{filename}" for filename in os.listdir(path)])

files

['data/raw/myanmar-protests/Myanmar 2021 - Feb 28.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 1-2.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 3.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 4.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 5.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 7.csv',
 'data/raw/myanmar-protests/Myanmar 2021 - March 8.csv']

In [3]:
df = (
    pd.concat(
        pd.read_csv(
            file,
            usecols=[0, 3, 9, 12, 13, 14, 16, 19, 20],
            header=0,
            names=[
                "content_code",
                "source_link",
                "graphic",
                "verification_status",
                "category_1",
                "category_2",
                "event_date",
                "latitude",
                "longitude",
            ],
            parse_dates=[5],
        ).assign(filename=lambda df: file)
        for file in files
    )
    .astype({"graphic": "bool", "latitude": "float", "longitude": "float"})
    .query("verification_status == 'Verified'")
    .reset_index(drop=True)
#     .fillna({"source_link": "N/A", "category_1": "Other", "event_date": "Unknown"})
)

df.info()
df.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   content_code         55 non-null     object 
 1   source_link          55 non-null     object 
 2   graphic              55 non-null     bool   
 3   verification_status  55 non-null     object 
 4   category_1           55 non-null     object 
 5   category_2           10 non-null     object 
 6   event_date           55 non-null     object 
 7   latitude             55 non-null     float64
 8   longitude            55 non-null     float64
 9   filename             55 non-null     object 
dtypes: bool(1), float64(2), object(7)
memory usage: 4.0+ KB


Unnamed: 0,content_code,source_link,graphic,verification_status,category_1,category_2,event_date,latitude,longitude,filename
0,MMR001,https://twitter.com/hannayuri_twt/status/13659...,True,Verified,Deaths,,28-Feb-21,21.96887,96.091196,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
1,MMR002,https://twitter.com/mipnnn1910/status/13660053...,True,Verified,Deaths,,28-Feb-21,21.96887,96.091196,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
2,MMR003,https://twitter.com/Myanmar_Now_Eng/status/136...,True,Verified,Deaths,,27-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
3,MMR004,https://twitter.com/matthewfsmith/status/13658...,True,Verified,Deaths,,27-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
4,MMR005,https://www.facebook.com/khitthitnews/videos/2...,False,Verified,Live Ammunition,,28-Feb-21,14.07117,98.189569,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
5,MMR006,https://twitter.com/wkkaung/status/13659912118...,False,Verified,Less-lethals,,28-Feb-21,16.828289,96.154511,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
6,MMR007,https://twitter.com/wkkaung/status/13659923915...,False,Verified,Less-lethals,,28-Feb-21,16.828364,96.155626,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
7,MMR008,https://twitter.com/ThomasVLinge/status/136602...,True,Verified,Deaths,,28-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
8,MMR009,https://www.youtube.com/watch?v=xgmUbOc048Y&fe...,True,Verified,Deaths,Live Ammunition,28-Feb-21,14.076131,98.19317,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...
9,MMR010,https://twitter.com/ThomasVLinge/status/136601...,False,Verified,Less-lethals,,28-Feb-21,12.441568,98.596877,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...


## Transform and write to CSV for geospatial processing

### Define function to concatenate categories for map tooltips

In [4]:
def tooltip(categories_list):
    return ", ".join(categories_list).capitalize()

In [5]:
df_geo = df.copy().assign(
    categories_list_nans=lambda df: df[["category_1", "category_2"]].values.tolist(),
    categories_list=lambda df: df["categories_list_nans"].apply(
        lambda x: sorted([d for d in x if str(d) != "nan"])
    ),
    tooltip = lambda df: df["categories_list"].apply(lambda x: tooltip(x))
)

df_geo.info()
df_geo.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   content_code          55 non-null     object 
 1   source_link           55 non-null     object 
 2   graphic               55 non-null     bool   
 3   verification_status   55 non-null     object 
 4   category_1            55 non-null     object 
 5   category_2            10 non-null     object 
 6   event_date            55 non-null     object 
 7   latitude              55 non-null     float64
 8   longitude             55 non-null     float64
 9   filename              55 non-null     object 
 10  categories_list_nans  55 non-null     object 
 11  categories_list       55 non-null     object 
 12  tooltip               55 non-null     object 
dtypes: bool(1), float64(2), object(10)
memory usage: 5.3+ KB


Unnamed: 0,content_code,source_link,graphic,verification_status,category_1,category_2,event_date,latitude,longitude,filename,categories_list_nans,categories_list,tooltip
0,MMR001,https://twitter.com/hannayuri_twt/status/13659...,True,Verified,Deaths,,28-Feb-21,21.96887,96.091196,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, nan]",[Deaths],Deaths
1,MMR002,https://twitter.com/mipnnn1910/status/13660053...,True,Verified,Deaths,,28-Feb-21,21.96887,96.091196,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, nan]",[Deaths],Deaths
2,MMR003,https://twitter.com/Myanmar_Now_Eng/status/136...,True,Verified,Deaths,,27-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, nan]",[Deaths],Deaths
3,MMR004,https://twitter.com/matthewfsmith/status/13658...,True,Verified,Deaths,,27-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, nan]",[Deaths],Deaths
4,MMR005,https://www.facebook.com/khitthitnews/videos/2...,False,Verified,Live Ammunition,,28-Feb-21,14.07117,98.189569,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Live Ammunition, nan]",[Live Ammunition],Live ammunition
5,MMR006,https://twitter.com/wkkaung/status/13659912118...,False,Verified,Less-lethals,,28-Feb-21,16.828289,96.154511,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Less-lethals, nan]",[Less-lethals],Less-lethals
6,MMR007,https://twitter.com/wkkaung/status/13659923915...,False,Verified,Less-lethals,,28-Feb-21,16.828364,96.155626,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Less-lethals, nan]",[Less-lethals],Less-lethals
7,MMR008,https://twitter.com/ThomasVLinge/status/136602...,True,Verified,Deaths,,28-Feb-21,16.825675,96.126747,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, nan]",[Deaths],Deaths
8,MMR009,https://www.youtube.com/watch?v=xgmUbOc048Y&fe...,True,Verified,Deaths,Live Ammunition,28-Feb-21,14.076131,98.19317,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Deaths, Live Ammunition]","[Deaths, Live Ammunition]","Deaths, live ammunition"
9,MMR010,https://twitter.com/ThomasVLinge/status/136601...,False,Verified,Less-lethals,,28-Feb-21,12.441568,98.596877,data/raw/myanmar-protests/Myanmar 2021 - Feb 2...,"[Less-lethals, nan]",[Less-lethals],Less-lethals


In [6]:
df_geo[
    [
        "content_code",
        "source_link",
        "graphic",
        "event_date",
        "latitude",
        "longitude",
#         "filename",
        "tooltip",
    ]
].to_csv("data/processed/myanmar-protests-map.csv", index=False)