In [1]:
import pandas as pd
import numpy as np

import datetime

import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
# 데이터를 불러옵니다.
donation_original = pd.read_csv('./data/donations_dollar.csv')
general_original = pd.read_csv('./data/general_final.csv')
organizer_original = pd.read_csv('./data/organizer_dollar.csv')
updates_original = pd.read_csv('./data/updates_new.csv')
comments_original = pd.read_csv('./data/comments_dollar.csv')

In [3]:
general_original.head()

Unnamed: 0,chid,url,scrape_date,title,created_date,funds_raised,funds_target,picture,org_picture,counts_status_code,...,recent_time_window,funds_raised_dollar,funds_target_dollar,amount_raised_unattributed_dollar,char_age,success,dum_success,group_category,total_comments,update_counts
0,1,https://www.gofundme.com/f/-chris-beaty-memori...,2020-11-03,Chris Beaty Memorial Fund,2020-06-03,175083.0,200000.0,True,False,200,...,48 hours,175083,200000.0,0.0,153,False,0,Funerals & Memorials,74.0,1.0
1,2,https://www.gofundme.com/f/1-prosent-chance,2020-11-03,1 percent chance,2020-06-21,299042.0,2000000.0,True,False,200,...,48 hours,33792,226000.0,0.0,135,False,0,"Medical, Illness & Healing",0.0,0.0
2,3,https://www.gofundme.com/f/100-computers-in-10...,2020-11-04,MELS needs #100ComputersIn100Days,2020-06-19,13845.0,30000.0,True,True,200,...,48 hours,13845,30000.0,8196.0,138,False,0,Others,7.0,4.0
3,4,https://www.gofundme.com/f/100-to-victims-of-t...,2020-11-03,100% to Victims of Taal Volcano in Philippines,2020-01-16,25.0,5000.0,True,True,200,...,48 hours,25,5000.0,0.0,292,False,0,Accidents & Emergencies,0.0,0.0
4,5,https://www.gofundme.com/f/100k-for-center-for...,2020-11-04,$100k for Center for Family Life,2020-11-02,13950.0,100000.0,True,False,200,...,48 hours,13950,100000.0,1100.0,2,False,0,Others,1.0,0.0


# 작업을 위한 테이블 편집

In [69]:
#원하는 컬럼만 추출
general = general_original[['chid','title','created_date','funds_raised_dollar', 'funds_target_dollar', 'amount_raised_unattributed_dollar', 'picture', 'org_picture', 'campaign_hearts', 'num_recent_donations', 'total_donations', 'success']]
general.columns = ['chid','title','created_date','funds_raised_d', 'funds_target_d', 'amount_anonymous', 'picture', 'org_picture', 'campaign_hearts', 'num_recent_donations', 'total_donations', 'success']
general.head()

Unnamed: 0,chid,title,created_date,funds_raised_d,funds_target_d,amount_anonymous,picture,org_picture,campaign_hearts,num_recent_donations,total_donations,success
0,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False
1,2,1 percent chance,2020-06-21,33792,226000.0,0.0,True,False,542.0,0.0,602.0,False
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845,30000.0,8196.0,True,True,119.0,0.0,125.0,False
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25,5000.0,0.0,True,True,1.0,0.0,1.0,False
4,5,$100k for Center for Family Life,2020-11-02,13950,100000.0,1100.0,True,False,17.0,16.0,17.0,False


In [70]:
# 원하는 컬럼만 추출
donation = donation_original[['chid','is_anonymous','created_at', 'amount_dollar']]

In [71]:
donation.columns = ['chid','is_anonymous','donated_at', 'amount_d']

In [72]:
# donated_at 타입변경 및 시간스탬프 삭제
pd.options.mode.chained_assignment = None
donation.loc[:,'donated_at'] = pd.to_datetime(donation.loc[:,'donated_at']).dt.date

In [73]:
donation.dtypes

chid              int64
is_anonymous       bool
donated_at       object
amount_d        float64
dtype: object

In [74]:
# general과 donation 테이블 inner join
df_1 = pd.merge(general, donation, how = 'inner', on = 'chid')
df_1.dtypes

chid                      int64
title                    object
created_date             object
funds_raised_d            int64
funds_target_d          float64
amount_anonymous        float64
picture                    bool
org_picture                bool
campaign_hearts         float64
num_recent_donations    float64
total_donations         float64
success                    bool
is_anonymous               bool
donated_at               object
amount_d                float64
dtype: object

In [75]:
df_1[["created_date", "donated_at"]] = df_1[["created_date", "donated_at"]].apply(pd.to_datetime)
df_1.dtypes

chid                             int64
title                           object
created_date            datetime64[ns]
funds_raised_d                   int64
funds_target_d                 float64
amount_anonymous               float64
picture                           bool
org_picture                       bool
campaign_hearts                float64
num_recent_donations           float64
total_donations                float64
success                           bool
is_anonymous                      bool
donated_at              datetime64[ns]
amount_d                       float64
dtype: object

In [76]:
# 펀딩 성공여부를 알 수 있는 불리언 타입의 'is_successful' 컬럼 추가
df = df_1.assign(is_successful = df_1['funds_raised_d'] >= df_1['funds_target_d'],
                date_diff = df_1['donated_at'] - df_1['created_date'])
df.head()

Unnamed: 0,chid,title,created_date,funds_raised_d,funds_target_d,amount_anonymous,picture,org_picture,campaign_hearts,num_recent_donations,total_donations,success,is_anonymous,donated_at,amount_d,is_successful,date_diff
0,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-10-14,480.0,False,133 days
1,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-15,10.0,False,104 days
2,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-10,50.0,False,99 days
3,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-09,100.0,False,98 days
4,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-08,100.0,False,97 days


In [77]:
# timedelta인 date_diff를 숫자형(float)으로 변환 (그래프 x축에 두기 위해)
df['date_diff'] = df['date_diff'].apply(lambda x:x.days)

In [78]:
df.dtypes

chid                             int64
title                           object
created_date            datetime64[ns]
funds_raised_d                   int64
funds_target_d                 float64
amount_anonymous               float64
picture                           bool
org_picture                       bool
campaign_hearts                float64
num_recent_donations           float64
total_donations                float64
success                           bool
is_anonymous                      bool
donated_at              datetime64[ns]
amount_d                       float64
is_successful                     bool
date_diff                        int64
dtype: object

In [79]:
# 랜덤으로 chid 693번 펀딩의 정보 확인
df.loc[df['chid'] == 693,:].head()

Unnamed: 0,chid,title,created_date,funds_raised_d,funds_target_d,amount_anonymous,picture,org_picture,campaign_hearts,num_recent_donations,total_donations,success,is_anonymous,donated_at,amount_d,is_successful,date_diff
403212,693,Black Leadership Initiative,2020-06-26,15847,15000.0,9970.0,True,True,186.0,0.0,209.0,True,False,2020-07-21,100.0,True,25
403213,693,Black Leadership Initiative,2020-06-26,15847,15000.0,9970.0,True,True,186.0,0.0,209.0,True,True,2020-07-13,25.0,True,17
403214,693,Black Leadership Initiative,2020-06-26,15847,15000.0,9970.0,True,True,186.0,0.0,209.0,True,False,2020-07-11,20.0,True,15
403215,693,Black Leadership Initiative,2020-06-26,15847,15000.0,9970.0,True,True,186.0,0.0,209.0,True,False,2020-07-10,500.0,True,14
403216,693,Black Leadership Initiative,2020-06-26,15847,15000.0,9970.0,True,True,186.0,0.0,209.0,True,False,2020-07-09,25.0,True,13


In [80]:
df.head()

Unnamed: 0,chid,title,created_date,funds_raised_d,funds_target_d,amount_anonymous,picture,org_picture,campaign_hearts,num_recent_donations,total_donations,success,is_anonymous,donated_at,amount_d,is_successful,date_diff
0,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-10-14,480.0,False,133
1,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-15,10.0,False,104
2,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-10,50.0,False,99
3,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-09,100.0,False,98
4,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,True,False,1691.0,0.0,1705.0,False,False,2020-09-08,100.0,False,97


In [81]:
# 랜덤으로 chid 329번 펀딩의 정보 확인
df.loc[df['chid'] == 329,:].head()

Unnamed: 0,chid,title,created_date,funds_raised_d,funds_target_d,amount_anonymous,picture,org_picture,campaign_hearts,num_recent_donations,total_donations,success,is_anonymous,donated_at,amount_d,is_successful,date_diff
190621,329,Karen Nascembeni,2020-04-11,55435,50000.0,0.0,True,False,634.0,0.0,638.0,True,True,2020-10-29,10.0,True,201
190622,329,Karen Nascembeni,2020-04-11,55435,50000.0,0.0,True,False,634.0,0.0,638.0,True,False,2020-10-29,50.0,True,201
190623,329,Karen Nascembeni,2020-04-11,55435,50000.0,0.0,True,False,634.0,0.0,638.0,True,False,2020-10-29,50.0,True,201
190624,329,Karen Nascembeni,2020-04-11,55435,50000.0,0.0,True,False,634.0,0.0,638.0,True,False,2020-10-29,100.0,True,201
190625,329,Karen Nascembeni,2020-04-11,55435,50000.0,0.0,True,False,634.0,0.0,638.0,True,False,2020-07-25,100.0,True,105


# 일자별 기부횟수, 기부액 총합 살펴보기
## 기부횟수
### 일자별 기부 발생 수

In [17]:
# 날짜별 donation chid 카운트하여 일별 기부 수 구함
df_day_numbers = df.groupby('created_date')[["chid"]].count().reset_index()
df_day_numbers.head()

Unnamed: 0,created_date,chid
0,2019-11-03,2016
1,2019-11-04,3683
2,2019-11-05,1755
3,2019-11-06,3552
4,2019-11-07,4374


In [18]:
# 일자별 발생한 기부횟수
fig = px.bar(df_day_numbers, x='created_date', y='chid', height=500, title='일자별 기부 발생 수')
# fig.show()

### 요일별 기부 발생 수

In [19]:
# 요일 데이터 추가
df_day_numbers['weekday'] = df_day_numbers['created_date'].dt.dayofweek

In [20]:
# 요일별 발생한 기부횟수 구하기
# 0: 월요일, 6: 일요일
df_wday_numbers = df_day_numbers.groupby('weekday')[["chid"]].sum().sort_values('weekday', ascending=True).reset_index()

In [21]:
from pandas import Series, DataFrame # 급 import

In [22]:
# 숫자 요일을 문자 요일로도 만들어줌
df_wday_numbers['wday_name'] = Series(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], index=[0, 1, 2, 3, 4, 5, 6])
df_wday_numbers

Unnamed: 0,weekday,chid,wday_name
0,0,368806,Monday
1,1,326426,Tuesday
2,2,318133,Wednesday
3,3,329103,Thursday
4,4,308981,Friday
5,5,249157,Saturday
6,6,291071,Sunday


In [23]:
# 요일별 발생한 기부횟수
fig = px.bar(df_wday_numbers, x='wday_name', y='chid', height=500, title='요일별 기부 발생 수')
# fig.show()

## 기부액 총합
### 일자별 기부액 총합

In [24]:
df_day_amount = df.groupby('created_date')[["funds_raised_d", "funds_target_d"]].sum().reset_index()
df_day_amount['funds_raised_d'] = round(df_day_amount['funds_raised_d'])
df_day_amount['funds_target_d'] = round(df_day_amount['funds_target_d'])
df_day_amount.head()

Unnamed: 0,created_date,funds_raised_d,funds_target_d
0,2019-11-03,129331358,166880000.0
1,2019-11-04,339794866,489283830.0
2,2019-11-05,239493924,356661000.0
3,2019-11-06,244217040,269647000.0
4,2019-11-07,363678288,208313414.0


In [25]:
# 일자별 기부액 총합
fig = px.line(df_day_amount, x='created_date', y='funds_raised_d', height=500, title='일자별 기부액 총합')
# fig.show()

In [26]:
# 2020-03-15 에 기부된 금액, 주로 어떤 프로젝트로 갔을까? 코로나19 판데믹 선언과 관련 있을까?
df_0315 = df[df['donated_at']=='2020-03-15'].groupby('title')[["amount_d"]].sum().sort_values('amount_d', ascending=False).reset_index()
df_0315.head(10)

Unnamed: 0,title,amount_d
0,Fight4Fiachra,35802.3504
1,Orange County United Way Pandemic Relief Fund,35100.0
2,Un Respiro per Olbia - Emergenza COVID19,22229.1246
3,#insieme per la terapia intensiva di CISANELLO,22004.5998
4,Help Pomona students impacted by COVID-19,21649.0
5,Amanda Smith Rehabilitation Fund,16645.0
6,COVID-19 Osp.BustoA. e in memoria del dott. St...,15763.512
7,Emergenza Coronavirus - AOU Maggiore della Carit횪,15736.6158
8,emergenza coronavirus - rianimazione Cernusco,15641.8944
9,SOS.Covid19.Portugal,14555.5218


### 케이스 톺아보기 (전체 모금 금액 1st로 높았던 2020-03-15 best 건)
**Fight4Fiachra**
- 3.14에 open한 프로젝트
- 코로나19 관련 X 
- 희귀한 백혈병 환자에 대한 모금
- 3.19부터 11.11까지 9회에 걸친 업데이트로 환자 상태 및 상황 공유
- 펀딩이 열린 8개월 전의 모금액수가 압도적으로 많아 보이며, 현재까지도 (가장 최근 모금 9일 전) 모금 진행중

**Orange County United Way Pandemic Relief Fund**
- 코로나19 관련

**Un Respiro per Olbia - Emergenza COVID19**
- 코로나19 관련

**#insieme per la terapia intensiva di CISANELLO**
- 코로나19 관련

**Help Pomona students impacted by COVID-19**
- 코로나19 관련

In [27]:
# 2020-06-01 에 기부된 금액, 주로 어떤 프로젝트로 갔을까? Black Lives Matter와 관련 있을까?
df_0601 = df[df['donated_at']=='2020-06-01'].groupby('title')[["amount_d"]].sum().sort_values('amount_d', ascending=False).reset_index()
df_0601.head(10)

Unnamed: 0,title,amount_d
0,Supporting Robbie McEachern and Family,46045.0
1,Orange County United Way Pandemic Relief Fund,39751.0
2,Matt Gilliam & Family Memorial Fund,39570.0
3,#TeamDessino,34155.0
4,Help Us Rebuild Sunny Optometry Destroyed by R...,30311.0
5,BKShowsLove Emergency Fund to Feed Brooklynites,26301.0
6,FRIENDS OF FLOYD,24333.0
7,Covid took Sarah from her beloved children.,22823.0
8,Lloyd's Pharmacy Rebuilding Fund - St. Paul Riots,16785.0
9,Hack the System for Social Justice,16570.0


### 케이스 톺아보기 (전체 모금 금액 3rd로 높았던 2020-06-01 best 건)
(참고) On May 25, 2020, George Floyd, a 46-year-old black man, was killed in Minneapolis, Minnesota, while being arrested for allegedly using a counterfeit bill.

**Supporting Robbie McEachern and Family**
- 암에 걸린 남성에 대한 펀딩
- 2020-05-31에 업로드되었고, 다음날인 2020-06-01에 압도적으로 많은 기부 들어옴
- 총 기부금액(11.24 홈피 기준) $167,980. 전체 금액 기준으로 봐도 첫날에 25% 이상 들어온 것 확인
- 2020-06-02에 감사 업데이트, 2020-06-19에 사망했다는 내용의 업데이트(사망일 6.18)가 있음

**Orange County United Way Pandemic Relief Fund**
- 코로나19 케이스와 동일 (스테디셀러)

**Matt Gilliam & Family Memorial Fund**
- 사망한 가장의 가족을 위한 펀딩
- 6.1에 시작했고 대부분의 금액이 6.5까지 모금되어 6.5에 감사인사 업데이트
- 하지만 현재까지 목표금액인 150000을 채우지는 못함, 그래도 꽤 많은 금액(131020)

**#TeamDessino**
- 사망하게 된 한 남성에 대한 스토리
- 인물에 대한 맥락 소개가 자세함
- 5.31에 업로드했고 6.2에 업데이트 2회 있었음
- 목표금액 100000를 101075로 달성

**Help Us Rebuild Sunny Optometry Destroyed by Riots**
- 폭도들(?)이 병원을 침입하여 각종 장비들을 훼손하고 약탈한 상황으로 보임
- 전체 250000 중 87531만 달성
- 장문의 스토리와 유튜브 링크도 몇 개 들어가있음

# 타이틀 특정 문구(예. COVID, Coronavirus, Black 등) 별로 콘텐츠 분류 가능한지 확인
해보지는 않았고, 시도가 가능하다 정도만 남겨둠

## 코로나19 관련
- COVID
- Corona

In [28]:
df[df['title'].str.contains('COVID')].groupby('title')[["amount_d"]].sum().sort_values('amount_d', ascending=False).reset_index()

Unnamed: 0,title,amount_d
0,Un Respiro per Olbia - Emergenza COVID19,422436.3948
1,COVID-19: PW Residents Need Our Help NOW,323574.0000
2,Pros For Heroes COVID-19 Relief Fund,213835.0000
3,MN Immigrant Families COVID-19 Fund,201673.0000
4,MillionLives.org : COVID-19 Global Support Fund,197260.0000
...,...,...
135,COVID-2019 Community Response - Food Security,14215.0000
136,COVID-19:Help Feed the Most Vulnerable in Oakl...,14034.0000
137,Ethio-STL COVID-19 Fund Raising,14010.0000
138,COVID-19 Indigenous Seed Crisis Response Circle,13936.0000


In [29]:
df[df['title'].str.contains('Corona')].groupby('title')[["amount_d"]].sum().sort_values('amount_d', ascending=False).reset_index()

Unnamed: 0,title,amount_d
0,India Coronavirus Crisis,168787.9369
1,Emergenza Coronavirus Marche #NoiSiamoLeMarche,164004.8418
2,"SOS Coronavirus, fondi per la terapia intensiva",149065.7568
3,Coronavirus a Pisa: sosteniamo il nostro osped...,142333.521
4,Help Fight Corona Virus in the Algarve,127850.502
5,"Coronavirus, ""Che l'inse?""",112764.0726
6,Shlomi��셲 Corona fight,112373.0
7,Emergenza Coronavirus - AOU Maggiore della Carit횪,102095.6364
8,Coronavirus - fondi per Policlinico Modena,68264.8944
9,Ospedale Santo Spirito VS Corona Virus,67530.5112


## BlackLivesMatter 관련
- FLOYD
- Matter
- MATTER

In [30]:
df[df['title'].str.contains('FLOYD')].groupby('title')[["amount_d"]].sum().reset_index()

Unnamed: 0,title,amount_d
0,FRIENDS OF FLOYD,132935.0


In [31]:
df[df['title'].str.contains('Matter')].groupby('title')[["amount_d"]].sum().reset_index()

Unnamed: 0,title,amount_d
0,Black Lives Matter,104474.0
1,Black Lives Matter Walk Fund,15572.0
2,Black Minds Matter BMM,21721.69
3,Black Minds Matter UK,84853.7659
4,Black Trans Lives Matter Youth Fund,82931.0
5,Carbon People of Color ERG- Black Lives Matter,30568.0
6,Everesting for Black Lives Matter via NAACP,27304.0
7,Ismailis for Black Lives Matter,23138.0
8,Lift Every Voice for #BlackLivesMatter,13770.0
9,Our Black Legacy Matters: Wilfandel Preservation,19010.0


In [32]:
df[df['title'].str.contains('MATTER')].groupby('title')[["amount_d"]].sum().reset_index()

Unnamed: 0,title,amount_d
0,Alun's campaign for MEN MATTER SCOTLAND,26703.371
1,HORROR WRITERS FOR BLACK LIVES MATTER,25095.0


# 분야별 기부횟수, 기부액 비교
## 데이터 정리

In [33]:
general_original.columns

Index(['Unnamed: 0', 'chid', 'url', 'scrape_date', 'title', 'created_date',
       'funds_raised', 'funds_target', 'picture', 'org_picture',
       'counts_status_code', 'velocity_status_code', 'updates_status_code',
       'comments_status_code', 'donations_status_code', 'currency', 'tags',
       'story', 'receiver_name', 'receiver_tax', 'total_photos',
       'total_co_photos', 'total_community_photos', 'total_comments',
       'total_updates', 'total_donations', 'total_unique_donors',
       'amount_raised_unattributed', 'number_of_donations_unattributed',
       'campaign_hearts', 'social_share_total', 'num_recent_donations',
       'recent_time_window', 'funds_raised_dollar', 'funds_target_dollar',
       'amount_raised_unattributed_dollar', 'char_age', 'success',
       'dum_success'],
      dtype='object')

In [34]:
by_category1 = general_original.groupby('tags')[["chid", "funds_raised", "funds_target", "social_share_total"]]
by_category2 = by_category1.agg({'chid':'count','funds_raised':'mean','funds_target':'mean','social_share_total':'mean'}).sort_values('chid', ascending=False).reset_index()
by_category2.rename(columns={'tags':'category', 'chid':'case', 'funds_raised':'avg_raised', 'funds_target':'avg_target', 'social_share_total':'avg_share'}, inplace=True)



In [35]:
by_category2.avg_raised = round(by_category2.avg_raised, 1)
by_category2.avg_target = round(by_category2.avg_target, 1)
by_category2.avg_share = round(by_category2.avg_share)

by_category2.head()

Unnamed: 0,category,case,avg_raised,avg_target,avg_share
0,"Medical, Illness & Healing",1117,111014.3,203147.6,3516.0
1,Accidents & Emergencies,1104,84381.2,125270.3,3485.0
2,Funerals & Memorials,1030,67879.5,79981.3,3661.0
3,Community & Neighbors,193,68186.7,153299.5,833.0
4,Education & Learning,97,31174.6,61866.0,596.0


In [36]:
by_category3 = by_category2.assign(rate = round(by_category2['case']/by_category2.case.sum() * 100, 2))
by_category3.columns

Index(['category', 'case', 'avg_raised', 'avg_target', 'avg_share', 'rate'], dtype='object')

## (미사용) 분야별 케이스 수, 비율, 평균 모금액, 평균 목표액, 평균 공유 수 및 분포

In [37]:
category = by_category3.reindex(columns=['category', 'case', 'rate', 'avg_raised', 'avg_target', 'avg_share'])
category.sort_values('rate', ascending=False)

Unnamed: 0,category,case,rate,avg_raised,avg_target,avg_share
0,"Medical, Illness & Healing",1117,28.61,111014.3,203147.6,3516.0
1,Accidents & Emergencies,1104,28.28,84381.2,125270.3,3485.0
2,Funerals & Memorials,1030,26.38,67879.5,79981.3,3661.0
3,Community & Neighbors,193,4.94,68186.7,153299.5,833.0
4,Education & Learning,97,2.48,31174.6,61866.0,596.0
5,Other,72,1.84,39411.8,80830.6,820.0
6,Volunteer & Service,58,1.49,44146.0,80947.9,773.0
7,Animals & Pets,52,1.33,47403.4,75738.5,2307.0
8,"Missions, Faith & Church",48,1.23,33054.3,115860.3,823.0
9,"Creative Arts, Music & Film",36,0.92,29141.5,45922.2,913.0


In [38]:
fig = px.pie(category, values='rate', names='category', hover_data=['case'], title='기부횟수 분야별 비율')
# fig.show()

In [39]:
fig = px.bar(category, x='category', y='case', height=500, title='분야별 기부횟수')
# fig.show()

## TOP 3 + a 각각의 기부금액, 타겟금액, 기부자수 분포 그려보기
- Medical, Illness & Healing	
- Accidents & Emergencies	
- Funerals & Memorials	
- Community & Neighbors	
- Education & Learning

### 기부금액 분포

In [172]:
# general 테이블에서 필요한 컬럼만 추출
general = general_original[['chid','title','created_date','funds_raised_dollar', 'funds_target_dollar', 'amount_raised_unattributed_dollar', 'category', 'campaign_hearts', 'total_donations', 'success', 'group_category']]
gnr = general.query('total_donations < 1089')
gnr = general.query('funds_raised_dollar < 2000000')
gnr = general.query('funds_target_dollar < 4000000')
gnr.head()

Unnamed: 0,chid,title,created_date,funds_raised_dollar,funds_target_dollar,amount_raised_unattributed_dollar,category,campaign_hearts,total_donations,success,group_category
0,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,0.0,Funerals & Memorials,1691.0,1705.0,False,Funerals & Memorials
1,2,1 percent chance,2020-06-21,33792,226000.0,0.0,"Medical, Illness & Healing",542.0,602.0,False,"Medical, Illness & Healing"
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845,30000.0,8196.0,Education & Learning,119.0,125.0,False,Others
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25,5000.0,0.0,Accidents & Emergencies,1.0,1.0,False,Accidents & Emergencies
4,5,$100k for Center for Family Life,2020-11-02,13950,100000.0,1100.0,Community & Neighbors,17.0,17.0,False,Others


In [164]:
medical = gnr[gnr['category'] == 'Medical, Illness & Healing']
accidents = gnr[gnr['category'] == 'Accidents & Emergencies']
funerals = gnr[gnr['category'] == 'Funerals & Memorials']
community = gnr[gnr['category'] == 'Community & Neighbors']
education = gnr[gnr['category'] == 'Education & Learning']

In [198]:
'''
medical_date = medical.groupby('created_date')[['funds_raised_dollar', 'funds_target_dollar', 'campaign_hearts', 'campaign_hearts']].mean().sort_values('created_date', ascending=True).reset_index()
fig = px.line(medical_date, x='created_date', y='funds_raised_dollar', height=500)
fig.show()
'''

"\nmedical_date = medical.groupby('created_date')[['funds_raised_dollar', 'funds_target_dollar', 'campaign_hearts', 'campaign_hearts']].mean().sort_values('created_date', ascending=True).reset_index()\nfig = px.line(medical_date, x='created_date', y='funds_raised_dollar', height=500)\nfig.show()\n"

# 랜덤 프로젝트 펀딩 추이 살펴보기

In [115]:
# [주의: 건드리지 마시오] 여기서 엔터 누르면 다 바뀌는 거야... 주옥되는 거야...
# 카테고리별로 1개씩 샘플링
# chid: 271, 1655, 2454, 2332
s1 = medical.sample(1)
s2 = accidents.sample(1)
s3 = funerals.sample(1)
s4 = others.sample(1)

In [133]:
# 보기 편하게 한 테이블로 모아봅니다. 시각화는 따로 합니다.
ss = pd.concat([s1, s2, s3, s4])
ss

Unnamed: 0,chid,title,created_date,funds_raised_dollar,funds_target_dollar,amount_raised_unattributed_dollar,category,campaign_hearts,total_donations,success,group_category
270,271,"At 7, Callum is having to relearn everything.",2020-09-11,106475,100000.0,106475.0,"Medical, Illness & Healing",782.0,798.0,True,"Medical, Illness & Healing"
1653,1655,Help rebuild Kendra & Mike's home,2020-08-23,55005,50000.0,48147.0,Accidents & Emergencies,264.0,268.0,True,Accidents & Emergencies
2452,2454,Mark Remolino,2020-05-04,64163,50000.0,0.0,Funerals & Memorials,312.0,316.0,True,Funerals & Memorials
2330,2332,Leslie's campaign for Peter Pan Foundation,2020-02-20,16230,200000.0,10255.0,Volunteer & Service,102.0,120.0,False,Others


In [329]:
# 개별 프로젝트 일자별 펀딩 추이
s1_ = pd.merge(s1, donation, how = 'inner', on = 'chid').groupby('donated_at')[["amount_d", "chid"]]
s1_daily = s1_.agg({'amount_d':'sum','chid':'count'}).sort_values('donated_at', ascending=True).reset_index()
fig = px.line(s1_daily, x='donated_at', y='chid', height=500, title='At 7, Callum is having to relearn everything.')
fig.show()

In [330]:
# 개별 프로젝트 일자별 펀딩 추이
s2_ = pd.merge(s2, donation, how = 'inner', on = 'chid').groupby('donated_at')[["amount_d", "chid"]]
s2_daily = s2_.agg({'amount_d':'sum','chid':'count'}).sort_values('donated_at', ascending=True).reset_index()
fig = px.line(s2_daily, x='donated_at', y='chid', height=500, title="Help rebuild Kendra & Mike's home")
fig.show()

In [331]:
# 개별 프로젝트 일자별 펀딩 추이
s3_ = pd.merge(s3, donation, how = 'inner', on = 'chid').groupby('donated_at')[["amount_d", "chid"]]
s3_daily = s3_.agg({'amount_d':'sum','chid':'count'}).sort_values('donated_at', ascending=True).reset_index()
fig = px.line(s3_daily, x='donated_at', y='chid', height=500, title='Mark Remolino')
fig.show()

In [332]:
# 개별 프로젝트 일자별 펀딩 추이
s4_ = pd.merge(s4, donation, how = 'inner', on = 'chid').groupby('donated_at')[["amount_d", "chid"]]
s4_daily = s4_.agg({'amount_d':'sum','chid':'count'}).sort_values('donated_at', ascending=True).reset_index()
fig = px.line(s4_daily, x='donated_at', y='chid', height=500, title="Leslie's campaign for Peter Pan Foundation")
fig.show()

In [333]:
# [주의: 건드리지 마시오] 여기서 엔터 누르면 다 바뀌는 거야... 주옥되는 거야...
# 카테고리별로 10개씩 샘플링
a1 = medical.sample(10)
a2 = accidents.sample(10)
a3 = funerals.sample(10)
a4 = others.sample(10)

# Organizer location를 회귀분석 요소에 추가하기

In [135]:
organizer_original

Unnamed: 0,chid,org_id,org_name,org_type,location,raised,num_donations,currency,raised_dollar
0,1,0,Jared Thomas,Organizer,"Indianapolis, IN",,,dollar,
1,1,1,Chris Beaty Memorial Fund,Beneficiary,,,,dollar,
2,2,2,Saha Heirati,Organizer,,,,krona,
3,3,3,Gus Jacobson,Organizer,"Forest Hills, NY",,,dollar,
4,3,4,Kathleen Scarpa,Team member,,,,dollar,
...,...,...,...,...,...,...,...,...,...
8199,3903,8199,Kelly Culver,Beneficiary,,,,dollar,
8200,3904,8200,Sarah Manoucheri,Organizer,"Glen Cove, NY",,,dollar,
8201,3904,8201,Ineta Yaghoubian,Beneficiary,,,,dollar,
8202,3905,8202,Family And Friends Of Carmella Rossi,Organizer,"Buffalo, NY",,,dollar,


In [139]:
organizer_original.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8204 entries, 0 to 8203
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   chid           8204 non-null   int64  
 1   org_id         8204 non-null   int64  
 2   org_name       8204 non-null   object 
 3   org_type       8204 non-null   object 
 4   location       3770 non-null   object 
 5   raised         2279 non-null   float64
 6   num_donations  2279 non-null   object 
 7   currency       8204 non-null   object 
 8   raised_dollar  2279 non-null   float64
dtypes: float64(2), int64(2), object(5)
memory usage: 577.0+ KB


In [143]:
organizer_original

Unnamed: 0,chid,org_id,org_name,org_type,location,raised,num_donations,currency,raised_dollar
0,1,0,Jared Thomas,Organizer,"Indianapolis, IN",,,dollar,
1,1,1,Chris Beaty Memorial Fund,Beneficiary,,,,dollar,
2,2,2,Saha Heirati,Organizer,,,,krona,
3,3,3,Gus Jacobson,Organizer,"Forest Hills, NY",,,dollar,
4,3,4,Kathleen Scarpa,Team member,,,,dollar,
...,...,...,...,...,...,...,...,...,...
8199,3903,8199,Kelly Culver,Beneficiary,,,,dollar,
8200,3904,8200,Sarah Manoucheri,Organizer,"Glen Cove, NY",,,dollar,
8201,3904,8201,Ineta Yaghoubian,Beneficiary,,,,dollar,
8202,3905,8202,Family And Friends Of Carmella Rossi,Organizer,"Buffalo, NY",,,dollar,


In [154]:
# organizer의 경우 location 정보가 전체 3904 중 3770개 있음 (274만 없음)
organizer_original[organizer_original['org_type'] == 'Organizer'].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3904 entries, 0 to 8202
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   chid           3904 non-null   int64  
 1   org_id         3904 non-null   int64  
 2   org_name       3904 non-null   object 
 3   org_type       3904 non-null   object 
 4   location       3770 non-null   object 
 5   raised         759 non-null    float64
 6   num_donations  759 non-null    object 
 7   currency       3904 non-null   object 
 8   raised_dollar  759 non-null    float64
dtypes: float64(2), int64(2), object(5)
memory usage: 305.0+ KB


In [150]:
organizer_original[organizer_original['org_type'] == 'Beneficiary'].shape

(2055, 9)

In [151]:
organizer_original[organizer_original['org_type'] == 'Team member'].shape

(2245, 9)

In [152]:
# organizer가 사는 지역에 따라 펀딩 성공 여부가 달라지는지

Unnamed: 0,chid,org_id,org_name,org_type,location,raised,num_donations,currency,raised_dollar
0,1,0,Jared Thomas,Organizer,"Indianapolis, IN",,,dollar,
1,1,1,Chris Beaty Memorial Fund,Beneficiary,,,,dollar,
2,2,2,Saha Heirati,Organizer,,,,krona,
3,3,3,Gus Jacobson,Organizer,"Forest Hills, NY",,,dollar,
4,3,4,Kathleen Scarpa,Team member,,,,dollar,
5,4,5,Gordon Montgomery,Organizer,,,,dollar,
6,5,6,Puneet Pardasani,Organizer,"New York, NY",,,dollar,
7,6,7,Ruben T Garcia,Organizer,"San Antonio, TX",,,dollar,
8,7,8,Jude Bernard,Organizer,"Brooklyn, NY",12270.0,129.0,dollar,12270.0
9,7,9,Beatrice Oscar,Team member,,2895.0,44.0,dollar,2895.0


# Social Share가 코로나에 의해 영향을 받은 것 같다
## 코로나 전 4개월, 코로나 4개월, 코로나 지속 4개월로 그룹을 나누어 살펴보자

In [184]:
from statsmodels.formula.api import ols
from statsmodels.formula.api import logit

In [229]:
#원하는 컬럼만 추출
general = general_original[['chid', 'title', 'created_date', 'funds_raised', 'funds_target', 'picture', 'org_picture', 'category', 'story', 'total_photos', 'total_co_photos', 'total_community_photos', 'total_updates', 'total_donations', 'total_unique_donors', 'amount_raised_unattributed', 'number_of_donations_unattributed', 'campaign_hearts', 'social_share_total', 'funds_raised_dollar', 'funds_target_dollar', 'char_age', 'success', 'dum_success', 'total_comments', 'update_counts']]

In [194]:
# 아래 방법은 멋있으나 두 가지 분기밖에 안 되어
# general['period'] = ["before_covid" if s <= '2020-02-28' else "after_covid" for s in general['created_date']] 

In [237]:
conditionlist = [
    (general['created_date'] <= '2020-02-28') ,
    (general['created_date'] >= '2020-03-01') & (general['created_date'] <= '2020-06-30'),
    (general['created_date'] >= '2020-07-01') & (general['created_date'] <= '2020-11-04')]
choicelist = ['before_covid', 'during_covid', 'after_covid']
general['period'] = np.select(conditionlist, choicelist, default='Not Specified')

In [238]:
general.head()

Unnamed: 0,chid,title,created_date,funds_raised,funds_target,picture,org_picture,category,story,total_photos,...,campaign_hearts,social_share_total,funds_raised_dollar,funds_target_dollar,char_age,success,dum_success,total_comments,update_counts,period
0,1,Chris Beaty Memorial Fund,2020-06-03,175083.0,200000.0,True,False,Funerals & Memorials,This is the official Chris Beaty GoFundMe crea...,3.0,...,1691.0,9069.0,175083,200000.0,153,False,0,74.0,1.0,during_covid
1,2,1 percent chance,2020-06-21,299042.0,2000000.0,True,False,"Medical, Illness & Healing",Shortly about Saha: | It is with a heavy heart...,1.0,...,542.0,1882.0,33792,226000.0,135,False,0,0.0,0.0,during_covid
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845.0,30000.0,True,True,Education & Learning,Cost Breakdown | 100 Computers x approximately...,2.0,...,119.0,383.0,13845,30000.0,138,False,0,7.0,4.0,during_covid
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25.0,5000.0,True,True,Accidents & Emergencies,I was caught within 7 miles of the Taal Volcan...,1.0,...,1.0,0.0,25,5000.0,292,False,0,0.0,0.0,before_covid
4,5,$100k for Center for Family Life,2020-11-02,13950.0,100000.0,True,False,Community & Neighbors,I have been on the board of the Center for Fam...,1.0,...,17.0,0.0,13950,100000.0,2,False,0,1.0,0.0,after_covid


In [239]:
res = logit('dum_success ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', general).fit()

Optimization terminated successfully.
         Current function value: 0.672681
         Iterations 5


In [240]:
bc = general[general['period'] == 'before_covid']
dc = general[general['period'] == 'during_covid']
ac = general[general['period'] == 'after_covid']

In [307]:
res_bc = logit('dum_success ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', bc).fit()
res_bc.summary()

Optimization terminated successfully.
         Current function value: 0.670815
         Iterations 5


0,1,2,3
Dep. Variable:,dum_success,No. Observations:,765.0
Model:,Logit,Df Residuals:,758.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 01 Dec 2020",Pseudo R-squ.:,0.01832
Time:,21:06:47,Log-Likelihood:,-513.17
converged:,True,LL-Null:,-522.75
Covariance Type:,nonrobust,LLR p-value:,0.003912

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.4671,0.735,-0.636,0.525,-1.907,0.973
total_photos,-0.0148,0.012,-1.220,0.222,-0.039,0.009
total_updates,-0.0225,0.016,-1.415,0.157,-0.054,0.009
campaign_hearts,-4.74e-05,8.46e-05,-0.560,0.575,-0.000,0.000
social_share_total,1.428e-05,1.91e-05,0.747,0.455,-2.32e-05,5.17e-05
char_age,0.0003,0.002,0.135,0.893,-0.004,0.005
total_comments,0.0034,0.002,2.218,0.027,0.000,0.006


In [306]:
res_dc = logit('dum_success ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', dc).fit()
res_dc.summary()

Optimization terminated successfully.
         Current function value: 0.667871
         Iterations 5


0,1,2,3
Dep. Variable:,dum_success,No. Observations:,1626.0
Model:,Logit,Df Residuals:,1619.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 01 Dec 2020",Pseudo R-squ.:,0.006373
Time:,21:06:30,Log-Likelihood:,-1086.0
converged:,True,LL-Null:,-1092.9
Covariance Type:,nonrobust,LLR p-value:,0.03041

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0067,0.286,-0.023,0.981,-0.568,0.554
total_photos,-0.0065,0.008,-0.849,0.396,-0.022,0.009
total_updates,-0.0100,0.011,-0.929,0.353,-0.031,0.011
campaign_hearts,7.498e-06,2.13e-05,0.353,0.724,-3.42e-05,4.92e-05
social_share_total,8.889e-06,8.24e-06,1.079,0.281,-7.26e-06,2.5e-05
char_age,-0.0022,0.001,-1.481,0.139,-0.005,0.001
total_comments,0.0013,0.001,1.075,0.282,-0.001,0.004


In [305]:
res_ac = logit('dum_success ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', ac).fit()
res_ac.summary()

Optimization terminated successfully.
         Current function value: 0.672189
         Iterations 6


0,1,2,3
Dep. Variable:,dum_success,No. Observations:,1438.0
Model:,Logit,Df Residuals:,1431.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 01 Dec 2020",Pseudo R-squ.:,0.006308
Time:,21:05:57,Log-Likelihood:,-966.61
converged:,True,LL-Null:,-972.74
Covariance Type:,nonrobust,LLR p-value:,0.05615

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.5909,0.129,-4.569,0.000,-0.844,-0.337
total_photos,-0.0016,0.005,-0.292,0.770,-0.012,0.009
total_updates,-0.0137,0.011,-1.237,0.216,-0.035,0.008
campaign_hearts,-8.316e-05,6.18e-05,-1.345,0.179,-0.000,3.8e-05
social_share_total,-1.4e-05,1.47e-05,-0.954,0.340,-4.28e-05,1.48e-05
char_age,0.0038,0.002,2.386,0.017,0.001,0.007
total_comments,0.0033,0.002,1.749,0.080,-0.000,0.007


In [244]:
ols_bc = ols('funds_raised_dollar ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', bc).fit()
ols_bc.summary()

0,1,2,3
Dep. Variable:,funds_raised_dollar,R-squared:,0.51
Model:,OLS,Adj. R-squared:,0.506
Method:,Least Squares,F-statistic:,131.3
Date:,"Tue, 01 Dec 2020",Prob (F-statistic):,9.22e-114
Time:,11:34:19,Log-Likelihood:,-9153.6
No. Observations:,765,AIC:,18320.0
Df Residuals:,758,BIC:,18350.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.425e+04,1.37e+04,4.681,0.000,3.73e+04,9.12e+04
total_photos,-36.5854,194.871,-0.188,0.851,-419.137,345.966
total_updates,591.1179,247.802,2.385,0.017,104.657,1077.579
campaign_hearts,22.4367,1.567,14.319,0.000,19.361,25.513
social_share_total,-0.7044,0.345,-2.044,0.041,-1.381,-0.028
char_age,-87.5425,44.637,-1.961,0.050,-175.169,0.084
total_comments,239.7568,28.256,8.485,0.000,184.288,295.226

0,1,2,3
Omnibus:,345.634,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2090.758
Skew:,1.965,Prob(JB):,0.0
Kurtosis:,10.082,Cond. No.,60700.0


In [245]:
ols_dc = ols('funds_raised_dollar ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', dc).fit()
ols_dc.summary()

0,1,2,3
Dep. Variable:,funds_raised_dollar,R-squared:,0.534
Model:,OLS,Adj. R-squared:,0.532
Method:,Least Squares,F-statistic:,309.4
Date:,"Tue, 01 Dec 2020",Prob (F-statistic):,2.69e-264
Time:,11:34:20,Log-Likelihood:,-21774.0
No. Observations:,1626,AIC:,43560.0
Df Residuals:,1619,BIC:,43600.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-5.412e+04,2.22e+04,-2.432,0.015,-9.78e+04,-1.05e+04
total_photos,537.3782,497.118,1.081,0.280,-437.684,1512.440
total_updates,985.4890,788.711,1.249,0.212,-561.512,2532.490
campaign_hearts,46.6882,1.630,28.641,0.000,43.491,49.886
social_share_total,3.5200,0.633,5.557,0.000,2.277,4.763
char_age,452.8998,114.354,3.960,0.000,228.601,677.198
total_comments,-309.3083,92.083,-3.359,0.001,-489.922,-128.694

0,1,2,3
Omnibus:,3707.772,Durbin-Watson:,2.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28951847.85
Skew:,20.72,Prob(JB):,0.0
Kurtosis:,655.393,Cond. No.,47900.0


In [246]:
ols_ac = ols('funds_raised_dollar ~ total_photos + total_updates + campaign_hearts + social_share_total + char_age + total_comments', ac).fit()
ols_ac.summary()

0,1,2,3
Dep. Variable:,funds_raised_dollar,R-squared:,0.698
Model:,OLS,Adj. R-squared:,0.697
Method:,Least Squares,F-statistic:,551.9
Date:,"Tue, 01 Dec 2020",Prob (F-statistic):,0.0
Time:,11:34:21,Log-Likelihood:,-17981.0
No. Observations:,1438,AIC:,35980.0
Df Residuals:,1431,BIC:,36010.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.113e+04,4098.900,7.595,0.000,2.31e+04,3.92e+04
total_photos,-488.2830,170.449,-2.865,0.004,-822.639,-153.927
total_updates,587.8643,331.994,1.771,0.077,-63.383,1239.111
campaign_hearts,38.8924,1.000,38.898,0.000,36.931,40.854
social_share_total,-2.5084,0.440,-5.697,0.000,-3.372,-1.645
char_age,56.8770,51.208,1.111,0.267,-43.574,157.328
total_comments,473.2888,39.890,11.865,0.000,395.040,551.537

0,1,2,3
Omnibus:,1902.626,Durbin-Watson:,2.111
Prob(Omnibus):,0.0,Jarque-Bera (JB):,544487.484
Skew:,7.0,Prob(JB):,0.0
Kurtosis:,97.294,Cond. No.,15800.0


In [311]:
general.head()

Unnamed: 0,chid,title,created_date,funds_raised,funds_target,picture,org_picture,category,story,total_photos,...,campaign_hearts,social_share_total,funds_raised_dollar,funds_target_dollar,char_age,success,dum_success,total_comments,update_counts,period
0,1,Chris Beaty Memorial Fund,2020-06-03,175083.0,200000.0,True,False,Funerals & Memorials,This is the official Chris Beaty GoFundMe crea...,3.0,...,1691.0,9069.0,175083,200000.0,153,False,0,74.0,1.0,during_covid
1,2,1 percent chance,2020-06-21,299042.0,2000000.0,True,False,"Medical, Illness & Healing",Shortly about Saha: | It is with a heavy heart...,1.0,...,542.0,1882.0,33792,226000.0,135,False,0,0.0,0.0,during_covid
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845.0,30000.0,True,True,Education & Learning,Cost Breakdown | 100 Computers x approximately...,2.0,...,119.0,383.0,13845,30000.0,138,False,0,7.0,4.0,during_covid
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25.0,5000.0,True,True,Accidents & Emergencies,I was caught within 7 miles of the Taal Volcan...,1.0,...,1.0,0.0,25,5000.0,292,False,0,0.0,0.0,before_covid
4,5,$100k for Center for Family Life,2020-11-02,13950.0,100000.0,True,False,Community & Neighbors,I have been on the board of the Center for Fam...,1.0,...,17.0,0.0,13950,100000.0,2,False,0,1.0,0.0,after_covid


In [316]:
general['created_month'] = general['created_date'].str[:7]
general.head()

Unnamed: 0,chid,title,created_date,funds_raised,funds_target,picture,org_picture,category,story,total_photos,...,social_share_total,funds_raised_dollar,funds_target_dollar,char_age,success,dum_success,total_comments,update_counts,period,created_month
0,1,Chris Beaty Memorial Fund,2020-06-03,175083.0,200000.0,True,False,Funerals & Memorials,This is the official Chris Beaty GoFundMe crea...,3.0,...,9069.0,175083,200000.0,153,False,0,74.0,1.0,during_covid,2020-06
1,2,1 percent chance,2020-06-21,299042.0,2000000.0,True,False,"Medical, Illness & Healing",Shortly about Saha: | It is with a heavy heart...,1.0,...,1882.0,33792,226000.0,135,False,0,0.0,0.0,during_covid,2020-06
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845.0,30000.0,True,True,Education & Learning,Cost Breakdown | 100 Computers x approximately...,2.0,...,383.0,13845,30000.0,138,False,0,7.0,4.0,during_covid,2020-06
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25.0,5000.0,True,True,Accidents & Emergencies,I was caught within 7 miles of the Taal Volcan...,1.0,...,0.0,25,5000.0,292,False,0,0.0,0.0,before_covid,2020-01
4,5,$100k for Center for Family Life,2020-11-02,13950.0,100000.0,True,False,Community & Neighbors,I have been on the board of the Center for Fam...,1.0,...,0.0,13950,100000.0,2,False,0,1.0,0.0,after_covid,2020-11


In [323]:
o = general.groupby('created_month')[['chid']].count().reset_index()
o

Unnamed: 0,created_month,chid
0,2019-11,169
1,2019-12,185
2,2020-01,269
3,2020-02,149
4,2020-03,488
5,2020-04,423
6,2020-05,329
7,2020-06,389
8,2020-07,390
9,2020-08,444


In [327]:
fig = px.bar(o, x='created_month', y='chid', height=500, title='월별 기부 프로젝트 수')
fig.show()

In [325]:
o.chid.sum()

3847

In [326]:
general.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3847 entries, 0 to 3903
Data columns (total 28 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   chid                              3847 non-null   int64  
 1   title                             3847 non-null   object 
 2   created_date                      3847 non-null   object 
 3   funds_raised                      3847 non-null   float64
 4   funds_target                      3847 non-null   float64
 5   picture                           3847 non-null   bool   
 6   org_picture                       3847 non-null   bool   
 7   category                          3847 non-null   object 
 8   story                             3842 non-null   object 
 9   total_photos                      3821 non-null   float64
 10  total_co_photos                   3821 non-null   float64
 11  total_community_photos            3821 non-null   float64
 12  total_

# 카테고리별 기부액수 기부건수 성공비율

In [251]:
general

Unnamed: 0,chid,title,created_date,funds_raised,funds_target,picture,org_picture,category,story,total_photos,...,campaign_hearts,social_share_total,funds_raised_dollar,funds_target_dollar,char_age,success,dum_success,total_comments,update_counts,period
0,1,Chris Beaty Memorial Fund,2020-06-03,175083.0,200000.0,True,False,Funerals & Memorials,This is the official Chris Beaty GoFundMe crea...,3.0,...,1691.0,9069.0,175083,200000.0,153,False,0,74.0,1.0,during_covid
1,2,1 percent chance,2020-06-21,299042.0,2000000.0,True,False,"Medical, Illness & Healing",Shortly about Saha: | It is with a heavy heart...,1.0,...,542.0,1882.0,33792,226000.0,135,False,0,0.0,0.0,during_covid
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845.0,30000.0,True,True,Education & Learning,Cost Breakdown | 100 Computers x approximately...,2.0,...,119.0,383.0,13845,30000.0,138,False,0,7.0,4.0,during_covid
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25.0,5000.0,True,True,Accidents & Emergencies,I was caught within 7 miles of the Taal Volcan...,1.0,...,1.0,0.0,25,5000.0,292,False,0,0.0,0.0,before_covid
4,5,$100k for Center for Family Life,2020-11-02,13950.0,100000.0,True,False,Community & Neighbors,I have been on the board of the Center for Fam...,1.0,...,17.0,0.0,13950,100000.0,2,False,0,1.0,0.0,after_covid


In [253]:
general[['chid','title','created_date','funds_raised_dollar', 'funds_target_dollar', 'category', 'campaign_hearts', 'total_donations', 'success']]

Unnamed: 0,chid,title,created_date,funds_raised_dollar,funds_target_dollar,category,campaign_hearts,total_donations,success
0,1,Chris Beaty Memorial Fund,2020-06-03,175083,200000.0,Funerals & Memorials,1691.0,1705.0,False
1,2,1 percent chance,2020-06-21,33792,226000.0,"Medical, Illness & Healing",542.0,602.0,False
2,3,MELS needs #100ComputersIn100Days,2020-06-19,13845,30000.0,Education & Learning,119.0,125.0,False
3,4,100% to Victims of Taal Volcano in Philippines,2020-01-16,25,5000.0,Accidents & Emergencies,1.0,1.0,False
4,5,$100k for Center for Family Life,2020-11-02,13950,100000.0,Community & Neighbors,17.0,17.0,False
...,...,...,...,...,...,...,...,...,...
3899,3901,Helping the Hansens: A Family in Crisis,2020-07-24,68691,15000.0,"Medical, Illness & Healing",636.0,673.0,True
3900,3902,Zurn Strong,2020-10-13,41330,20000.0,Funerals & Memorials,348.0,356.0,True
3901,3903,Sean Culver recovery and rehab fund,2020-08-28,59205,50000.0,"Medical, Illness & Healing",559.0,562.0,True
3902,3904,In loving memory of Koorosh (Cyrus) Yaghoubian,2020-08-09,58767,80000.0,Funerals & Memorials,298.0,312.0,False


In [257]:
general.groupby('category')[['total_donations']].sum().reset_index().sort_values('total_donations', ascending=False)

Unnamed: 0,category,total_donations
12,"Medical, Illness & Healing",1715092.0
0,Accidents & Emergencies,1087943.0
11,Funerals & Memorials,794254.0
5,Community & Neighbors,74864.0
1,Animals & Pets,37636.0
15,Other,31732.0
19,Volunteer & Service,28039.0
9,Education & Learning,21974.0
7,"Creative Arts, Music & Film",17047.0
13,"Missions, Faith & Church",9730.0


# K-Means 시도

In [284]:
general = general.query('funds_target_dollar < 4000000')

In [258]:
from sklearn.cluster import KMeans

In [303]:
X = general[['char_age']]
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)

In [304]:
fig = px.scatter(
    general, x = 'char_age', y='funds_target_dollar', hover_data=['title'], color=kmeans.labels_)
fig.show()

In [291]:
X = general[['funds_target_dollar']]
kmeans = KMeans(n_clusters=4, random_state=0).fit(X)

In [292]:
fig = px.scatter(
    general, x = 'funds_raised_dollar', y='funds_target_dollar', hover_data=['title'], color=kmeans.labels_)
fig.show()

In [1]:
fig = px.scatter(
    general, x = 'total_comments', y='funds_target_dollar', hover_data=['title'], color=kmeans.labels_)
fig.show()

NameError: name 'px' is not defined

In [5]:
general_original.shape

(3904, 40)