## API Key

In [1]:
api_key = '...'

## Imports

In [2]:
import os
import sys
import pandas as pd
import numpy as np
import json
import time
import datetime
import requests
import pickle

## Get Articles through API

In [3]:
def get_all_articles(articles):
    years_num = list(np.arange(1981,2019))
    years = []
    
    for year in years_num:
        years.append(str(year))
    
    months_num = list(np.arange(1,13))
    months = []
    
    for month in months_num:
        months.append(str(month))
    
    for year in years:
        articles[year] = {}
        for month in months:
            if year == '2018' and month == '3':
                break
            articles[year][month] = {}
            print('Getting {}/{} articles'.format(year,month))
            url = 'https://api.nytimes.com/svc/archive/v1/{}/{}.json'.format(year,month)
            params = {'api-key': api_key}
            response = requests.get(url, params=params)

            while response.status_code != 200:
                print('trying again...')
                time.sleep(3)
                response = requests.get(url, params=params)
                print('status code: {}'.format(response.status_code))
        
            articlesjson = json.loads(response.text)
            docs = articlesjson['response']['docs']

            articles[year][month] = len(docs)
        
        print('Dumping year {}.'.format(year))
        
        with open('archive_exports/archive_export.json', 'w') as fp:
            json.dump(articles, fp)

    print('Exporting articles 1981_1-2018_2 to json')
    with open('archive_exports/archive_export.json', 'w') as fp:
        json.dump(articles, fp)

## Get Counts per year

In [4]:
with open('archive_exports/total_articles_dict.pickle','wb') as file:
    pickle.dump(articles,file)

In [5]:
# with open('archive_exports/total_articles_dict.pickle','rb') as file:
#     articles = pickle.load(file)

In [6]:
df = pd.DataFrame(articles)

In [7]:
# Transpose to get desired layout
df = df.transpose()

In [8]:
# Get Totals Column
df['total'] = df[list(df.columns)].sum(axis=1)

In [9]:
df

Unnamed: 0,1,10,11,12,2,3,4,5,6,7,8,9,total
1981,7204.0,8965.0,8539.0,7770.0,6266.0,7094.0,7136.0,8031.0,7722.0,7304.0,7476.0,7512.0,91019.0
1982,8286.0,9033.0,8122.0,7325.0,7672.0,7239.0,7896.0,6702.0,7012.0,7552.0,7934.0,7154.0,91927.0
1983,7537.0,9136.0,8548.0,7476.0,7499.0,7994.0,9369.0,8573.0,7160.0,8733.0,8018.0,7160.0,97203.0
1984,8371.0,9356.0,8686.0,7643.0,8457.0,8246.0,9305.0,9168.0,7512.0,9075.0,8883.0,7571.0,102273.0
1985,8004.0,9397.0,8917.0,8409.0,7698.0,8241.0,8561.0,8992.0,7891.0,8304.0,8561.0,7953.0,100928.0
1986,8702.0,9751.0,9181.0,7949.0,8347.0,8556.0,9518.0,9381.0,8044.0,8670.0,8973.0,7987.0,105059.0
1987,8674.0,8940.0,9352.0,7784.0,8175.0,8323.0,9050.0,9364.0,7503.0,8051.0,8493.0,7806.0,101515.0
1988,8386.0,9086.0,9007.0,7401.0,8335.0,8381.0,7980.0,9092.0,7476.0,8409.0,8582.0,7814.0,99949.0
1989,8243.0,8826.0,8582.0,7546.0,7785.0,8469.0,9174.0,8792.0,7312.0,8060.0,8042.0,6999.0,97830.0
1990,8378.0,8158.0,7499.0,6603.0,8037.0,8392.0,9151.0,8422.0,6589.0,7653.0,7405.0,6829.0,93116.0


In [10]:
with open('archive_exports/total_articles_per_month.pickle','wb') as file:
    pickle.dump(df,file)

In [11]:
df_totals = df[['total']].copy()

In [12]:
df_totals

Unnamed: 0,total
1981,91019.0
1982,91927.0
1983,97203.0
1984,102273.0
1985,100928.0
1986,105059.0
1987,101515.0
1988,99949.0
1989,97830.0
1990,93116.0


In [13]:
with open('archive_exports/total_articles_per_year.pickle','wb') as file:
    pickle.dump(df_totals,file)

In [14]:
df_totals.reset_index(inplace=True)

In [15]:
df_totals.rename(columns={'index':'pub_year'},inplace=True)

In [16]:
df_totals.head()

Unnamed: 0,pub_year,total
0,1981,91019.0
1,1982,91927.0
2,1983,97203.0
3,1984,102273.0
4,1985,100928.0


## Combine with Mental Health Article Counts

In [17]:
with open('pickled_files/raw_data.pickle', 'rb') as file:
    raw = pickle.load(file)

In [18]:
raw = pd.DataFrame(raw['pub_date'])

In [19]:
raw['pub_year'] = [a[:4] for a in raw['pub_date']]

In [20]:
raw.head()

Unnamed: 0,pub_date,pub_year
https://lens.blogs.nytimes.com/2017/06/21/handicapped-but-no-longer-invisible-andres-millan/,2017-06-21,2017
https://lens.blogs.nytimes.com/2017/08/22/combat-photographer-marine-ptsd-book/,2017-08-22,2017
https://www.nytimes.com/2017/06/01/nyregion/bronx-police-shooting-mental-illness.html,2017-06-02,2017
https://www.nytimes.com/2017/06/01/world/canada/nurse-killings-insulin.html,2017-06-02,2017
https://www.nytimes.com/2017/06/02/learning/editorial-contest-winner-the-anguish-of-the-rich.html,2017-06-02,2017


In [21]:
raw_counts = raw.groupby('pub_year',as_index=False).count()

In [22]:
raw_counts.rename(columns={'pub_date':'mental_health'},inplace=True)

In [23]:
mental_health_share = raw_counts.merge(df_totals,on=['pub_year'])

In [24]:
mental_health_share['share'] = mental_health_share['mental_health'] / mental_health_share['total']

In [25]:
mental_health_share['percentage'] = 100*mental_health_share['mental_health'] / mental_health_share['total']

In [26]:
mental_health_share.head()

Unnamed: 0,pub_year,mental_health,total,share,percentage
0,1981,742,91019.0,0.008152,0.815214
1,1982,705,91927.0,0.007669,0.766913
2,1983,570,97203.0,0.005864,0.586402
3,1984,553,102273.0,0.005407,0.54071
4,1985,547,100928.0,0.00542,0.541971


In [27]:
with open('archive_exports/mental_health_share_over_time.pickle','wb') as file:
    pickle.dump(mental_health_share,file)

In [28]:
mental_health_share.to_csv('archive_exports/mental_health_share_over_time.csv',sep=',',header=True,columns=mental_health_share.columns)