# Local news publications
Data from [Project News Oasis](https://www.projectnewsoasis.com/publications)

In [1]:
import pandas as pd



In [2]:
df = pd.read_csv('data/publications.csv')
df.columns = df.columns.str.replace(' ', '_').str.replace('(', '', regex=False) \
                        .str.replace(')', '', regex=False).str.replace('-', '', regex=False) \
                        .str.lower()
df.columns

Index(['publication_name', 'parent_publication', 'url', 'owner',
       'is_owner_founder', 'city', 'state', 'country', 'primary_language',
       'primary_language_other', 'tax_status_founded', 'tax_status_current',
       'year_founded', 'total_employees', 'budget_%__editorial',
       'budget_%__revenue_generation', 'budget_%__product/technology',
       'budget_%__administration', 'products', 'products_other',
       'distribution', 'distribution_method_other', 'geographic_area',
       'core_editorial_strategy_characteristics',
       'core_editorial_strategy_characteristics_other', 'coverage_topics',
       'coverage_topics_other', 'underrepresented_communities',
       'underrepresented_communities_not_listed', 'revenue_streams',
       'revenue_stream_other', 'revenue_stream__additional_info',
       'revenue_stream__largest', 'revenue_streams_largest_other',
       'paywall_or_gateway', 'paywall_or_gateway_other',
       'advertising_products', 'advertising_product_other',
   

In [3]:
# filter out canada
df = df[df.country!='Canada']

In [4]:
df.tax_status_founded.value_counts()

LLC                                        115
Sole Proprietor/no specific tax status      64
Nonprofit 501c(3) or Canadian nonprofit     33
Under umbrella of a 501c(3)                 25
S Corp                                      20
For Profit                                  19
Public-benefit corporation                   1
Partnership                                  1
Name: tax_status_founded, dtype: int64

In [5]:
df[df.year_founded>=2000].tax_status_founded.value_counts()

LLC                                        111
Sole Proprietor/no specific tax status      58
Nonprofit 501c(3) or Canadian nonprofit     31
Under umbrella of a 501c(3)                 25
S Corp                                      15
For Profit                                  15
Public-benefit corporation                   1
Partnership                                  1
Name: tax_status_founded, dtype: int64

## Number of publications with each tax status over time

In [6]:
# create data frame counting the number of publications with each tax status over time
df_count = df.groupby('year_founded').tax_status_founded.value_counts().reset_index(name = 'count')
df_count.tail()

Unnamed: 0,year_founded,tax_status_founded,count
96,2020.0,LLC,6
97,2020.0,Under umbrella of a 501c(3),4
98,2020.0,For Profit,2
99,2020.0,S Corp,2
100,2020.0,Sole Proprietor/no specific tax status,2


In [7]:
# filter above dataframe to just number of publications after 2000
df_count_00=df_count[df_count.year_founded>=2000]

In [8]:
df_count_00.to_csv('data/publication-tax-status.csv', index=False)

## How number of publications that are a 501c(3) or under a 501c(3) umbrella have changed 

In [9]:
# look at just under umbrella of a 501c(3)
df_umbrella = df_count[df_count.tax_status_founded == 'Under umbrella of a 501c(3)']
df_umbrella

Unnamed: 0,year_founded,tax_status_founded,count
45,2009.0,Under umbrella of a 501c(3),2
51,2010.0,Under umbrella of a 501c(3),2
57,2011.0,Under umbrella of a 501c(3),2
61,2012.0,Under umbrella of a 501c(3),2
65,2013.0,Under umbrella of a 501c(3),1
72,2015.0,Under umbrella of a 501c(3),1
75,2016.0,Under umbrella of a 501c(3),2
82,2017.0,Under umbrella of a 501c(3),1
85,2018.0,Under umbrella of a 501c(3),6
93,2019.0,Under umbrella of a 501c(3),2


In [10]:
# look at just 501c(3) -- NOTE: no nonprofits were recorded as being founded in 2020
df_nonprofit = df_count[df_count.tax_status_founded == 'Nonprofit 501c(3) or Canadian nonprofit']
df_nonprofit

Unnamed: 0,year_founded,tax_status_founded,count
0,1976.0,Nonprofit 501c(3) or Canadian nonprofit,1
26,2005.0,Nonprofit 501c(3) or Canadian nonprofit,2
33,2006.0,Nonprofit 501c(3) or Canadian nonprofit,1
43,2009.0,Nonprofit 501c(3) or Canadian nonprofit,4
47,2010.0,Nonprofit 501c(3) or Canadian nonprofit,4
56,2011.0,Nonprofit 501c(3) or Canadian nonprofit,2
60,2012.0,Nonprofit 501c(3) or Canadian nonprofit,3
67,2014.0,Nonprofit 501c(3) or Canadian nonprofit,4
70,2015.0,Nonprofit 501c(3) or Canadian nonprofit,5
76,2016.0,Nonprofit 501c(3) or Canadian nonprofit,1


In [11]:
# combine dataframes
df_merge = df_umbrella.merge(df_nonprofit, on='year_founded', suffixes=('_umbrella', '_nonprof'))
df_merge

Unnamed: 0,year_founded,tax_status_founded_umbrella,count_umbrella,tax_status_founded_nonprof,count_nonprof
0,2009.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,4
1,2010.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,4
2,2011.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,2
3,2012.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,3
4,2015.0,Under umbrella of a 501c(3),1,Nonprofit 501c(3) or Canadian nonprofit,5
5,2016.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,1
6,2018.0,Under umbrella of a 501c(3),6,Nonprofit 501c(3) or Canadian nonprofit,3
7,2019.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,2


In [12]:
# percent of total nonprofits founded that were under the umbrella of a 501c(3), as opposed to a standalone nonprofit
df_merge['umbrella_pct'] = df_merge.count_umbrella/(df_merge.count_umbrella+df_merge.count_nonprof)
df_merge

Unnamed: 0,year_founded,tax_status_founded_umbrella,count_umbrella,tax_status_founded_nonprof,count_nonprof,umbrella_pct
0,2009.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,4,0.333333
1,2010.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,4,0.333333
2,2011.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,2,0.5
3,2012.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,3,0.4
4,2015.0,Under umbrella of a 501c(3),1,Nonprofit 501c(3) or Canadian nonprofit,5,0.166667
5,2016.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,1,0.666667
6,2018.0,Under umbrella of a 501c(3),6,Nonprofit 501c(3) or Canadian nonprofit,3,0.666667
7,2019.0,Under umbrella of a 501c(3),2,Nonprofit 501c(3) or Canadian nonprofit,2,0.5
