# Analyze GiveSendGo data

In [69]:
%load_ext lab_black

In [200]:
import pandas as pd
import json
import numpy as np
import altair as alt

In [170]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### Open most recent json file

In [384]:
with open("increments/data_increment_523649.json") as f:
    data_all = json.load(f)

In [385]:
all_data = []

for d in data_all:
    df = pd.DataFrame(d["returnData"]["donations"])
    all_data.append(df)

df = pd.concat(all_data)

In [386]:
df = df.drop_duplicates().copy()

In [387]:
df.donation_amount = df.donation_amount.astype(float)

In [388]:
df.donation_amount.sum()

4752917.5

In [389]:
df.donation_amount.mean().round(2)

94.86

In [390]:
df.donation_name.count()

50107

In [391]:
df.sort_values("donation_id").head()

Unnamed: 0,donation_id,campaign_id,donation_amount,donation_comment,donation_conversion_rate,donation_name,donation_anonymous,donation_date,lovecount,likes
0,463549,49000,100.0,We support the Truckers standing up for our freedom.,1.0,Jeff Brain,0,6 days ago,0,0
0,464203,49000,100.0,God Bless you and Keep you safe! Freedom for All!,1.0,Edwards,0,6 days ago,0,0
2,464220,49000,25.0,Thank you! God bless you all and keep you safe. Godspeed you on your mission! ❤️😀❤️,1.0,MaryEllen Stevens,0,6 days ago,0,0
3,464221,49000,10.0,Glory to God.,1.0,Our turn.,0,6 days ago,0,0
1,464223,49000,20.0,Go Truckers!,1.0,,0,6 days ago,0,0


### Dates

In [392]:
df.donation_date = (
    df.donation_date.str.replace("6 days ago", "2022-02-02", regex=False)
    .str.replace("5 days ago", "2022-02-03", regex=False)
    .str.replace("4 days ago", "2022-02-04", regex=False)
    .str.replace("3 days ago", "2022-02-05", regex=False)
    .str.replace("2 days ago", "2022-02-06", regex=False)
    .str.replace("1 days ago", "2022-02-07", regex=False)
)

In [393]:
df["donation_date"] = pd.to_datetime(df["donation_date"])

In [394]:
df["date_clean"] = df["donation_date"].dt.date

In [395]:
df.dtypes

donation_id                          int64
campaign_id                          int64
donation_amount                    float64
donation_comment                    object
donation_conversion_rate            object
donation_name                       object
donation_anonymous                   int64
donation_date               datetime64[ns]
lovecount                            int64
likes                                int64
date_clean                          object
dtype: object

In [396]:
dates = (
    df.groupby(["donation_date"])
    .agg({"donation_id": "count", "donation_amount": sum})
    .reset_index()
)

In [397]:
dates.rename(columns={"donation_id": "count", "donation_amount": "sum"}, inplace=True)

In [398]:
dates

Unnamed: 0,donation_date,count,sum
0,2022-02-02,61,3348.0
1,2022-02-03,112,6651.5
2,2022-02-04,115,5424.0
3,2022-02-05,2965,311979.0
4,2022-02-06,17561,1923301.0
5,2022-02-07,29293,2502214.0


In [379]:
alt.Chart(dates).mark_bar(width=20).encode(
    x=alt.X(
        "donation_date:T",
        axis=alt.Axis(format="%b. %d", tickCount=6),
        title="Donation date",
    ),
    y=alt.Y("sum", axis=alt.Axis(tickCount=6), title="Dollars raised"),
)

---

In [380]:
df["donation_name"] = df["donation_name"].str.strip().str.lower()

In [381]:
df.loc[df["donation_anonymous"] > 1, "donation_name"] = "anonymous"

### Group to count and sum donations by donor name

In [382]:
df.groupby(["donation_name"]).agg(
    {"donation_amount": [np.size, np.sum]}
).reset_index().sort_values(("donation_amount", "sum"), ascending=False).head(20)

Unnamed: 0_level_0,donation_name,donation_amount,donation_amount
Unnamed: 0_level_1,Unnamed: 1_level_1,size,sum
0,,24744,2137036.5
15197,processed but not recorded.,1,215000.0
1030,anonymous,279,24237.0
20255,www the range langley com,1,18000.0
632,american cryptocurrency compatriot,1,17760.0
6282,freedom,187,16071.0
15070,pierre elite trudo blackface,1,9000.0
6306,freedom convoy,56,5416.0
18643,the walsers - for freedom!,1,5000.0
1491,beekman auctions,1,5000.0


In [383]:
df.groupby(["donation_name"]).agg(
    {"donation_amount": [np.size, np.sum]}
).reset_index().sort_values(("donation_amount", "size"), ascending=False).head(20)

Unnamed: 0_level_0,donation_name,donation_amount,donation_amount
Unnamed: 0_level_1,Unnamed: 1_level_1,size,sum
0,,24744,2137036.5
1030,anonymous,279,24237.0
6282,freedom,187,16071.0
10080,justin trudeau,58,4950.0
6306,freedom convoy,56,5416.0
6399,freedom lover,53,3335.0
17612,steve,49,3293.0
7769,honk honk,42,3490.0
4285,david,36,2627.0
9415,john,33,3034.0


In [None]:
# 100 biggest donations
# 100 first donations

In [401]:
df.sort_values("donation_amount", ascending=False).head(100).to_csv(
    "data/processed/100_largest_donations.csv", index=False
)

In [405]:
df.sort_values("donation_date", ascending=True).head(100).to_csv(
    "data/processed/100_earliest_donations.csv", index=False
)