In [118]:
import pandas as pd

In [119]:
path = "Resources/charity_data.csv"

In [121]:
# Reading .csv into dataframe
df = pd.read_csv(path)

In [122]:
# Drop no-use columns
df2 = df.drop(columns = ["EIN", "NAME"])

In [132]:
# Check Number of Unique Values in each column
df2.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
asking_amount_binned        12
dtype: int64

In [124]:
# Binnig asking amount and creating a new column
bins = [0, 5000, 10000, 50000, 100000, 500000, 1000000, 2000000, 5000000, 10000000, 100000000, 1000000000, 10000000000]
labels = ["5k", "5k-10k", "10k-50k", "50k-100k", "100k-500k", "500k-1M", "1M-2M", "2M-5M", "5M-10M", "10M-100M", "100M-1T", "1T+"]
df2["asking_amount_binned"] = pd.cut(df2["ASK_AMT"], bins=bins, labels=labels)
df2["asking_amount_binned"].value_counts()

5k           25398
10k-50k       2398
100k-500k     2304
50k-100k      1423
500k-1M        650
5k-10k         549
1M-2M          485
2M-5M          455
10M-100M       311
5M-10M         225
100M-1T         83
1T+             18
Name: asking_amount_binned, dtype: int64

In [125]:
# Grouping classification values with less than 50 instances as "Other"
df3 = df2["CLASSIFICATION"].value_counts()
df3 = df3.reset_index()
df3 = df3[df3.CLASSIFICATION < 50]
form_names = df3["index"]
form_names = list(form_names)
df4 = df2.replace(form_names, "Other")
df4["CLASSIFICATION"].value_counts()

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
Other      391
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
C2800       95
C7100       75
C1300       58
C1280       50
Name: CLASSIFICATION, dtype: int64

In [126]:
# Grouping Application Type with less than 100 instances as other
df5 = df2["APPLICATION_TYPE"].value_counts()
df5 = df5.reset_index()
df5 = df5[df5.APPLICATION_TYPE < 100]
app_names = df5["index"]
app_names = list(app_names)
df6 = df4.replace(app_names, "Other")
df6["APPLICATION_TYPE"].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
T9         156
Other      120
Name: APPLICATION_TYPE, dtype: int64

In [133]:
# Dropping column already binned and verifying number of unique values for all columns in dataframe
df6 = df6.drop(columns=["ASK_AMT"])
df6.nunique()

APPLICATION_TYPE          10
AFFILIATION                6
CLASSIFICATION            16
USE_CASE                   5
ORGANIZATION               4
STATUS                     2
INCOME_AMT                 9
SPECIAL_CONSIDERATIONS     2
IS_SUCCESSFUL              2
asking_amount_binned      12
dtype: int64