# Philanthropy Regressions
### Finnian Lowden

In [1]:
### Data dictionary

# Variable               Type       Description
# Grantmaker_name        String     Corporation/foundation that gave grant
# Year                   Float      Year grant was given
# Recipient_name         String     Organization that recived grant
# NTEE_code              String     NTEE code of organization given grant
# NTEE_category          String     Broader category of organization according to IRS
# Grant Amount           Float      Grant amount adjusted for inflation to 2020 dollars
# Recipient_city         String     City of recipient organization
# Recipient_state        String     State of recipient organization

In [2]:
### Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.preprocessing import normalize
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

import nltk
from nltk.corpus import stopwords # Importing stop words (e.g., the, and, a, of, etc.)
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /Users/finn/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
### Importing data

# Corporate giving dataset
# The dataset is large, so it takes a little while
complete_donations_df = pd.read_excel("Oil_corporations_NTEE_Data_MASTER_SHEET.xlsx", sheet_name = "Individual_donations")

# Text analysis results dataset
path = "/Users/finn/Documents/GitHub/environmental_text_analysis/Dictionary_methods/DoD_results.xlsx"
complete_text_df = pd.read_excel(path)


In [11]:
### Formatting dataframes

# Working with corporate philanthropy data
# Dropping irrelevant columns (those not in data dictionary)
donations_df = complete_donations_df[["grantmaker_name", "year", "recipient_name", "NTEE_code",
                                      "NTEE_category", "Grant Amount (2020 Dollars)",
                                      "recipient_city", "recipient_state"]]

# Renaming Grant Amount (2020 Dollars) to not include spaces
donations_df = donations_df.rename(columns = {"Grant Amount (2020 Dollars)": "grant_amount"})
        
# Checking to make sure changes were made
# donations_df.head()

# Working with text analysis dataframe
text_df = complete_text_df.copy()

# Adding indicator variables for each group
# text_df['TNC'] = np.where(text_df['Group'] == 'Nature Conservancy', 1, 0) # Not including to avoid perfect multico
text_df['AF_indc'] = np.where(text_df['Group'] == 'American Forests', 1, 0)
text_df['NFWF_indc'] = np.where(text_df['Group'] == 'National Fish and Wildlife Foundation', 1, 0)
text_df['NRDC_indc'] = np.where(text_df['Group'] == 'Natural Resources Defense Council', 1, 0)
text_df['CI_indc'] = np.where(text_df['Group'] == 'Conservation International', 1, 0)
text_df['WWF_indc'] = np.where(text_df['Group'] == 'World Wildlife Fund', 1, 0)
text_df['SC_indc'] = np.where(text_df['Group'] == 'Sierra Club', 1, 0)
text_df['OC_indc'] = np.where(text_df['Group'] == 'Ocean Conservancy', 1, 0)
text_df['EDF_indc'] = np.where(text_df['Group'] == 'Environmental Defense Fund', 1, 0)
text_df['NAS_indc'] = np.where(text_df['Group'] == 'Audubon Society', 1, 0)

# Checking to make sure changes were made
# text_df.head()

Unnamed: 0,Group,Individualism,The 'free rider' excuse,Whataboutism,"All talk, little action",Fossil fuel solutionism,"No sticks, just carrots",Technological optimism,Appeal to well-being,Policy perfectionism,...,Year,AF_indc,NFWF_indc,NRDC_indc,CI_indc,WWF_indc,SC_indc,OC_indc,EDF_indc,NAS_indc
0,Nature Conservancy,0.004436,0.0,0.00038,0.00076,0.000127,0.000887,0.000507,0.000253,0.000253,...,1980,0,0,0,0,0,0,0,0,0
1,Nature Conservancy,0.003941,0.0,0.000131,0.000263,0.000131,0.000131,0.000525,0.000131,0.0,...,1981,0,0,0,0,0,0,0,0,0
2,Nature Conservancy,0.001929,0.0,0.000772,0.000643,0.000129,0.000643,0.000257,0.000257,0.0,...,1982,0,0,0,0,0,0,0,0,0
3,Nature Conservancy,0.002117,0.0,0.000235,0.000118,0.000118,0.0,0.0,0.000118,0.0,...,1983,0,0,0,0,0,0,0,0,0
4,Nature Conservancy,0.005676,0.000196,0.0,0.000489,9.8e-05,0.000294,0.0,9.8e-05,0.000294,...,1984,0,0,0,0,0,0,0,0,0
