# Philanthropy Regressions
### Finnian Lowden

In [13]:
### Data dictionaries


# donations_df data dictionary
# Variable                    Type       Description
# Grantmaker_name             String     Corporation/foundation that gave grant
# Year                        Int        Year grant was given
# Recipient_name              String     Organization that recived grant
# NTEE_code                   String     NTEE code of organization given grant
# NTEE_category               String     Broader category of organization according to IRS
# Grant Amount                Float      Grant amount adjusted for inflation to 2020 dollars
# Recipient_city              String     City of recipient organization
# Recipient_state             String     State of recipient organization


# text_df data dictionary
# Variable                    Type       Description
# Group                       String     Name of environmental nonprofit
# Individualism               Float      Measure of prevalence of this discourse of delay (DoD) in the text
# The 'free rider' excuse     Float      Measure of prevalence of this DoD in the text in given year
# Whataboutism                Float      Measure of prevalence of this DoD in the text in given year
# All talk, little action     Float      Measure of prevalence of this DoD in the text in given year
# Fossil fuel solutionism     Float      Measure of prevalence of this DoD in the text in given year
# No sticks, just carrots     Float      Measure of prevalence of this DoD in the text in given year
# Technological optimism      Float      Measure of prevalence of this DoD in the text in given year
# Appeal to well-being        Float      Measure of prevalence of this DoD in the text in given year
# Policy perfectionism        Float      Measure of prevalence of this DoD in the text in given year
# Appeal to social justice    Float      Measure of prevalence of this DoD in the text in given year
# Change is impossible        Float      Measure of prevalence of this DoD in the text in given year
# Doomism                     Float      Measure of prevalence of this DoD in the text in given year
# Year                        Int        Year associated with prevalence measure
# AF_indc                     Int        Indicator variable for American Forests (AF)
# NFWF_indc                   Int        Indicator variable for the National Fish and Wildlife Foundation (NFWF)
# NRDC_indc                   Int        Indicator variable for the Natural Resources Defense Council (NRDC)
# CI_indc                     Int        Indicator variable for Conservation International(CI)
# WWF_indc                    Int        Indicator variable for the World Wildlife Fund (WWF)
# SC_indc                     Int        Indicator variable for the Sierra Club (SC)
# OC_indc                     Int        Indicator variable for The Ocean Conservancy (OC)
# EDF_indc                    Int        Indicator variable for the Environmental Defense Fund (EDF)
# NAS_indc                    Int        Indicator variable for the National Audubon Society (NAS)

In [14]:
### Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.preprocessing import normalize
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

import nltk
from nltk.corpus import stopwords # Importing stop words (e.g., the, and, a, of, etc.)
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /Users/finn/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [4]:
### Importing data

# Corporate giving dataset
# The dataset is large, so it takes a little while
complete_donations_df = pd.read_excel("Oil_corporations_NTEE_Data_MASTER_SHEET.xlsx", sheet_name = "Individual_donations")

# Text analysis results dataset
complete_text_df = pd.read_excel("DoD_results.xlsx")


In [12]:
### Formatting dataframes

# Working with corporate philanthropy data
# Dropping irrelevant columns (those not in data dictionary)
donations_df = complete_donations_df[["grantmaker_name", "year", "recipient_name", "NTEE_code",
                                      "NTEE_category", "Grant Amount (2020 Dollars)",
                                      "recipient_city", "recipient_state"]]

# Renaming Grant Amount (2020 Dollars) to not include spaces
donations_df = donations_df.rename(columns = {"Grant Amount (2020 Dollars)": "grant_amount"})
        
# Checking to make sure changes were made
# donations_df.head()

# Working with text analysis dataframe
text_df = complete_text_df.copy()

# Adding indicator variables for each group
# text_df['TNC'] = np.where(text_df['Group'] == 'Nature Conservancy', 1, 0) # Not including to avoid perfect multico
text_df['AF_indc'] = np.where(text_df['Group'] == 'American Forests', 1, 0)
text_df['NFWF_indc'] = np.where(text_df['Group'] == 'National Fish and Wildlife Foundation', 1, 0)
text_df['NRDC_indc'] = np.where(text_df['Group'] == 'Natural Resources Defense Council', 1, 0)
text_df['CI_indc'] = np.where(text_df['Group'] == 'Conservation International', 1, 0)
text_df['WWF_indc'] = np.where(text_df['Group'] == 'World Wildlife Fund', 1, 0)
text_df['SC_indc'] = np.where(text_df['Group'] == 'Sierra Club', 1, 0)
text_df['OC_indc'] = np.where(text_df['Group'] == 'Ocean Conservancy', 1, 0)
text_df['EDF_indc'] = np.where(text_df['Group'] == 'Environmental Defense Fund', 1, 0)
text_df['NAS_indc'] = np.where(text_df['Group'] == 'Audubon Society', 1, 0)

# Checking to make sure changes were made
text_df.head()

Unnamed: 0,Group,Individualism,The 'free rider' excuse,Whataboutism,"All talk, little action",Fossil fuel solutionism,"No sticks, just carrots",Technological optimism,Appeal to well-being,Policy perfectionism,...,Year,AF_indc,NFWF_indc,NRDC_indc,CI_indc,WWF_indc,SC_indc,OC_indc,EDF_indc,NAS_indc
0,Nature Conservancy,0.004436,0.0,0.00038,0.00076,0.000127,0.000887,0.000507,0.000253,0.000253,...,1980,0,0,0,0,0,0,0,0,0
1,Nature Conservancy,0.003941,0.0,0.000131,0.000263,0.000131,0.000131,0.000525,0.000131,0.0,...,1981,0,0,0,0,0,0,0,0,0
2,Nature Conservancy,0.001929,0.0,0.000772,0.000643,0.000129,0.000643,0.000257,0.000257,0.0,...,1982,0,0,0,0,0,0,0,0,0
3,Nature Conservancy,0.002117,0.0,0.000235,0.000118,0.000118,0.0,0.0,0.000118,0.0,...,1983,0,0,0,0,0,0,0,0,0
4,Nature Conservancy,0.005676,0.000196,0.0,0.000489,9.8e-05,0.000294,0.0,9.8e-05,0.000294,...,1984,0,0,0,0,0,0,0,0,0


In [6]:
### Adding corporate giving amount for each nonprofit to text_df



In [7]:
### Formatting control data in text_df



In [8]:
### Regression work
y_list = ['Individualism', "The 'free rider' excuse", 'Whataboutism', 'All talk, little action',
                                         'Fossil fuel solutionism', 'No sticks, just carrots',
                                         'Technological optimism', 'Appeal to well-being',
                                         'Policy perfectionism', 'Appeal to social justice',
                                         'Change is impossible', 'Doomism', 'Group']

# Creating X and Y data text_df
current_y = 'Individualism' # Select whatever Y I want to see (e.g., Individualism prevalence)
y_list.remove(current_y)
y = text_df[current_y]
X = text_df.drop(columns = y_list)

# Making sure all non-numeric columns and NaN values have been dropped
X.replace([np.inf, -np.inf], np.nan, inplace = True)
X.dropna(inplace = True)

# Running OLS regressions
olsReg = sm.OLS(y, X).fit()
print(olsReg.summary())

                                 OLS Regression Results                                
Dep. Variable:          Individualism   R-squared (uncentered):                   1.000
Model:                            OLS   Adj. R-squared (uncentered):              1.000
Method:                 Least Squares   F-statistic:                          2.032e+32
Date:                Sun, 06 Mar 2022   Prob (F-statistic):                        0.00
Time:                        11:16:49   Log-Likelihood:                          11311.
No. Observations:                 410   AIC:                                 -2.260e+04
Df Residuals:                     399   BIC:                                 -2.256e+04
Df Model:                          11                                                  
Covariance Type:            nonrobust                                                  
                    coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------

In [9]:
# Printing resuls from LASSO reg
coef_comp = pd.DataFrame({'var': X.columns, 'val_ols': olsReg.params.tolist()})
print(coef_comp)


              var       val_ols
0   Individualism  1.000000e+00
1            Year  8.673617e-17
2         AF_indc  8.881784e-14
3       NFWF_indc  7.993606e-14
4       NRDC_indc  8.171241e-14
5         CI_indc  6.394885e-14
6        WWF_indc  1.101341e-13
7         SC_indc -6.394885e-14
8         OC_indc  4.263256e-14
9        EDF_indc  4.263256e-14
10       NAS_indc  8.526513e-14
