# Philanthropy Regressions
### Finnian Lowden

In [13]:
### Data dictionaries


# donations_df data dictionary
# Variable                    Type       Description
# Grantmaker_name             String     Corporation/foundation that gave grant
# Year                        Int        Year grant was given
# Recipient_name              String     Organization that recived grant
# NTEE_code                   String     NTEE code of organization given grant
# NTEE_category               String     Broader category of organization according to IRS
# Grant Amount                Float      Grant amount adjusted for inflation to 2020 dollars
# Recipient_city              String     City of recipient organization
# Recipient_state             String     State of recipient organization


# text_df data dictionary
# Variable                    Type       Description
# Group                       String     Name of environmental nonprofit
# Individualism               Float      Measure of prevalence of this discourse of delay (DoD) in the text
# The 'free rider' excuse     Float      Measure of prevalence of this DoD in the text in given year
# Whataboutism                Float      Measure of prevalence of this DoD in the text in given year
# All talk, little action     Float      Measure of prevalence of this DoD in the text in given year
# Fossil fuel solutionism     Float      Measure of prevalence of this DoD in the text in given year
# No sticks, just carrots     Float      Measure of prevalence of this DoD in the text in given year
# Technological optimism      Float      Measure of prevalence of this DoD in the text in given year
# Appeal to well-being        Float      Measure of prevalence of this DoD in the text in given year
# Policy perfectionism        Float      Measure of prevalence of this DoD in the text in given year
# Appeal to social justice    Float      Measure of prevalence of this DoD in the text in given year
# Change is impossible        Float      Measure of prevalence of this DoD in the text in given year
# Doomism                     Float      Measure of prevalence of this DoD in the text in given year
# Year                        Int        Year associated with prevalence measure
# AF_indc                     Int        Indicator variable for American Forests (AF)
# NFWF_indc                   Int        Indicator variable for the National Fish and Wildlife Foundation (NFWF)
# NRDC_indc                   Int        Indicator variable for the Natural Resources Defense Council (NRDC)
# CI_indc                     Int        Indicator variable for Conservation International(CI)
# WWF_indc                    Int        Indicator variable for the World Wildlife Fund (WWF)
# SC_indc                     Int        Indicator variable for the Sierra Club (SC)
# OC_indc                     Int        Indicator variable for The Ocean Conservancy (OC)
# EDF_indc                    Int        Indicator variable for the Environmental Defense Fund (EDF)
# NAS_indc                    Int        Indicator variable for the National Audubon Society (NAS)

In [65]:
### Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.preprocessing import normalize
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

import csv
import nltk
from nltk.corpus import stopwords # Importing stop words (e.g., the, and, a, of, etc.)
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/finnianlowden/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
### Importing data

# Corporate giving dataset
# The dataset is large, so it takes a little while
complete_donations_df = pd.read_excel("Oil_corporations_NTEE_Data_MASTER_SHEET.xlsx", sheet_name = "Individual_donations")

# Text analysis results dataset
complete_text_df = pd.read_excel("DoD_results.xlsx")


In [217]:
### Formatting dataframes

# Working with corporate philanthropy data
# Dropping irrelevant columns (those not in data dictionary)
donations_df = complete_donations_df[["grantmaker_name", "year", "recipient_name", "NTEE_code",
                                      "NTEE_category", "Grant Amount (2020 Dollars)",
                                      "recipient_city", "recipient_state"]]

# Renaming Grant Amount (2020 Dollars) to not include spaces & converting to int
donations_df = donations_df.rename(columns = {"Grant Amount (2020 Dollars)": "grant_amount"})
donations_df["grant_amount"] = donations_df["grant_amount"]

# Making copy of complete_text_df
text_df = complete_text_df.copy()

# Checking to make sure changes were made
# donations_df.head()
# text_df.head()

In [218]:
### Adding corporate giving amount for each nonprofit to text_df
reduce_donations_df = donations_df.copy()
group_list = ['nature conservancy', 'american forests', 'national fish and wildlife foundation',
 'natural resources defense council', 'conservation international', 'world wildlife fund',
 'sierra club', 'ocean conservancy', 'environmental defense fund', 'audubon society']
reduce_donations_df["recipient_name"] = reduce_donations_df["recipient_name"].str.lower()
boolean_series = reduce_donations_df["recipient_name"].isin(group_list)
reduce_donations_df = reduce_donations_df[boolean_series]

# Grouping by year and group
annualized_donations_df = reduce_donations_df.groupby(
    ['recipient_name', 'year'], as_index = False).agg({'grant_amount': sum})
annualized_donations_df = pd.DataFrame(annualized_donations_df)
# annualized_donations_df.to_excel("Output.xlsx", index = False) # code to download as XSLX

# Adding corporate giving to regression_df
annualized_donations_df = annualized_donations_df.rename(
    columns = {"recipient_name": "Group", "year": "Year"}) # Renaming group column
text_df["Group"] = text_df["Group"].str.lower()
regression_df = pd.merge(text_df, annualized_donations_df, on = ['Group', 'Year'], how = 'outer')
regression_df["grant_amount"] = regression_df["grant_amount"].fillna(999)


In [219]:
### Formatting control data in regression_df
control_prevalence_df = text_df.copy()
control_list = ['greenpeace', 'earthjustice']
control_prevalence_df["Group"] = control_prevalence_df["Group"].str.lower()
boolean_series = control_prevalence_df["Group"].isin(control_list)
control_prevalence_df = control_prevalence_df[boolean_series]

# Grouping prevalence by year
annualized_control_prevalence_df = control_prevalence_df.groupby(
    ['Year'], as_index = False).agg({"Individualism": 'mean', "The 'free rider' excuse": 'mean',
                                     "Whataboutism": 'mean', "All talk, little action": 'mean',
                                     "Fossil fuel solutionism": 'mean', "No sticks, just carrots": 'mean',
                                     "Technological optimism": 'mean', "Appeal to well-being": 'mean',
                                     "Policy perfectionism": 'mean', "Appeal to social justice": 'mean',
                                     "Change is impossible": 'mean', "Doomism": 'mean'})
annualized_control_prevalence_df = pd.DataFrame(annualized_control_prevalence_df)
annualized_control_prevalence_df = annualized_control_prevalence_df.rename(
    columns={"Individualism": 'Individualism_control', "The 'free rider' excuse": 'Free_rider_control',
             "Whataboutism": 'Whataboutism_control', "All talk, little action": 'Talk_no_action_control',
             "Fossil fuel solutionism": 'FF_solutionism_control', "No sticks, just carrots": 'Carrots_control',
             "Technological optimism": 'Tech_optimism_control', "Appeal to well-being": 'Well_being_control',
             "Policy perfectionism": 'Perfect_policy_control', "Appeal to social justice": 'Social_justice_control',
             "Change is impossible": 'Change_impossible_control', "Doomism": 'Doomism_control'})

# Adding control data to regression_df
annualized_control_prevalence_df = annualized_control_prevalence_df.fillna(999)
regression_df = pd.merge(regression_df, annualized_control_prevalence_df, on = ['Year'], how = 'outer')

# Checking to make sure changes were made
# regression_df.head()

In [220]:
### Adding indicator variables for each group
# text_df['TNC'] = np.where(text_df['Group'] == 'Nature Conservancy', 1, 0) # Not including to avoid perfect multico
regression_df['AF_indc'] = np.where(regression_df['Group'] == 'american forests', 1, 0)
regression_df['NFWF_indc'] = np.where(regression_df['Group'] == 'national fish and wildlife foundation', 1, 0)
regression_df['NRDC_indc'] = np.where(regression_df['Group'] == 'natural resources defense council', 1, 0)
regression_df['CI_indc'] = np.where(regression_df['Group'] == 'conservation international', 1, 0)
regression_df['WWF_indc'] = np.where(regression_df['Group'] == 'world wildlife fund', 1, 0)
regression_df['SC_indc'] = np.where(regression_df['Group'] == 'sierra club', 1, 0)
regression_df['OC_indc'] = np.where(regression_df['Group'] == 'ocean conservancy', 1, 0)
regression_df['EDF_indc'] = np.where(regression_df['Group'] == 'environmental defense fund', 1, 0)
regression_df['NAS_indc'] = np.where(regression_df['Group'] == 'audubon society', 1, 0)

# Checking to make sure changes were made
# regression_df.head()

In [222]:
### Renaming columns in regression_df
regression_df = regression_df.rename(columns={"The 'free rider' excuse": 'Free_rider',
             "All talk, little action": 'Talk_no_action', "Fossil fuel solutionism": 'FF_solutionism',
             "No sticks, just carrots": 'Carrots', "Technological optimism": 'Tech_optimism',
             "Appeal to well-being": 'Well_being', "Policy perfectionism": 'Perfect_policy',
             "Appeal to social justice": 'Social_justice', "Change is impossible": 'Change_impossible'})
                                              
# Checking to make sure changes were made
# regression_df.head()

In [242]:
### Regression work
y_list = ['Individualism', 'Free_rider', 'Whataboutism', 'Talk_no_action', 'FF_solutionism',
'Carrots', 'Tech_optimism', 'Well_being', 'Perfect_policy', 'Social_justice',
'Change_impossible', 'Doomism']

control_list = [x + "_control" for x in y_list]

current_y = 'Individualism' # Select whatever Y I want to see (e.g., Individualism prevalence)
# Selecting y and dropping irrelevant ys and controls
current_control = current_y + "_control"
y_list.remove(current_y)
control_list.remove(current_control)
y_list = y_list + control_list

# Creating X and Y data from regression_df
y = regression_df[current_y]
X = regression_df.drop(columns = y_list, axis=1)

# Making sure all non-numeric columns and NaN values have been dropped
# X.replace([np.inf, -np.inf], np.nan, inplace = True)
# X.dropna(inplace = True)

# Running OLS regressions
olsReg = sm.OLS(y, X).fit()
print(olsReg.summary())

Unnamed: 0,Group,Individualism,Year,grant_amount,Individualism_control,AF_indc,NFWF_indc,NRDC_indc,CI_indc,WWF_indc,SC_indc,OC_indc,EDF_indc,NAS_indc


In [9]:
# Printing resuls from LASSO reg
coef_comp = pd.DataFrame({'var': X.columns, 'val_ols': olsReg.params.tolist()})
print(coef_comp)


              var       val_ols
0   Individualism  1.000000e+00
1            Year  8.673617e-17
2         AF_indc  8.881784e-14
3       NFWF_indc  7.993606e-14
4       NRDC_indc  8.171241e-14
5         CI_indc  6.394885e-14
6        WWF_indc  1.101341e-13
7         SC_indc -6.394885e-14
8         OC_indc  4.263256e-14
9        EDF_indc  4.263256e-14
10       NAS_indc  8.526513e-14
