# Reddit: Concatenate data

- <a href="https://www.reddit.com/r/inthenews">/r/inthenews</a>
- <a href="https://www.reddit.com/r/news">/r/news</a>
- <a href="https://www.reddit.com/r/savedyouaclick">/r/savedyouaclick</a>

In [4]:
import os
import pandas as pd

## Set input folder

In [5]:
data_in = '../data/00_raw/pushshift/'

pprint(sorted(os.listdir(data_in)))

['inthenews',
 'inthenews_all.csv',
 'inthenews_aoi.csv',
 'news',
 'news_gt1_all.csv',
 'news_gt1_aoi.csv',
 'savedyouaclick',
 'savedyouaclick_all.csv',
 'savedyouaclick_aoi.csv']


## Choose attributes of interest

In [5]:
aoi = ['created_utc',
       'num_comments',
       'score',
       'title']

## Functions

In [21]:
def get_items(subreddit, score=-1):
    dfs = list()
    total = 0

    for file in sorted(os.listdir(f'{data_in}/{subreddit}')):
        df = pd.read_csv(f'{data_in}/{subreddit}/{file}',
                         index_col='Unnamed: 0')
        df = df[df['score'] > score]
        dfs.append(df)

        total += df.shape[0]
        print(f'{file:30s} ... {df_shape[0]:>6d}')

    df = pd.concat(dfs, sort=True, ignore_index=True)  # concatenate
    df.to_csv(f'{data_in}/{subreddit}_all.csv')  # output to csv format
    df[aoi].to_csv(f'{data_in}/{subreddit}_aoi.csv')  # output to csv format

    print(f'\nTotal = {total}\n')
    
    return

<hr style="height: 5px" />

## /r/inthenews

In [22]:
get_items('inthenews')

inthenews_2010.csv             ...      3
inthenews_2011.csv             ...      6
inthenews_2012.csv             ...   1623


  if self.run_code(code, result):


inthenews_2013.csv             ...  15942


  if self.run_code(code, result):


inthenews_2014.csv             ...  22038


  if self.run_code(code, result):


inthenews_2015.csv             ...  25281


  if self.run_code(code, result):


inthenews_2016.csv             ...  24615


  if self.run_code(code, result):


inthenews_2017.csv             ...  25058


  if self.run_code(code, result):


inthenews_2018.csv             ...  11096

Total = 125662



<hr style="height: 5px" />

## /r/news

### Get only the items where <code>score > 1</code>

In [18]:
get_items('news', 1)

  interactivity=interactivity, compiler=compiler, result=result)


news_2018.csv                  ...  21614
news_cycle_00001.csv           ...    140
news_cycle_00002.csv           ...    103
news_cycle_00003.csv           ...    141
news_cycle_00004.csv           ...     61
news_cycle_00005.csv           ...     99
news_cycle_00006.csv           ...     96
news_cycle_00007.csv           ...     58
news_cycle_00008.csv           ...     97
news_cycle_00009.csv           ...     27
news_cycle_00010.csv           ...     75
news_cycle_00011.csv           ...    115
news_cycle_00012.csv           ...    109
news_cycle_00013.csv           ...    139
news_cycle_00014.csv           ...     37
news_cycle_00015.csv           ...    132
news_cycle_00016.csv           ...     33
news_cycle_00017.csv           ...    125
news_cycle_00018.csv           ...     47
news_cycle_00019.csv           ...    114
news_cycle_00020.csv           ...     65
news_cycle_00021.csv           ...     74
news_cycle_00022.csv           ...    113
news_cycle_00023.csv           ...

news_cycle_00201.csv           ...    173
news_cycle_00202.csv           ...     74
news_cycle_00203.csv           ...    156
news_cycle_00204.csv           ...     62
news_cycle_00205.csv           ...    110
news_cycle_00206.csv           ...    164
news_cycle_00207.csv           ...    115
news_cycle_00208.csv           ...    198
news_cycle_00209.csv           ...    110
news_cycle_00210.csv           ...    188
news_cycle_00211.csv           ...     47
news_cycle_00212.csv           ...    193
news_cycle_00213.csv           ...     64
news_cycle_00214.csv           ...    178
news_cycle_00215.csv           ...    100
news_cycle_00216.csv           ...    108
news_cycle_00217.csv           ...     90
news_cycle_00218.csv           ...    135
news_cycle_00219.csv           ...    184
news_cycle_00220.csv           ...     88
news_cycle_00221.csv           ...    191
news_cycle_00222.csv           ...     69
news_cycle_00223.csv           ...    166
news_cycle_00224.csv           ...

news_cycle_00402.csv           ...    131
news_cycle_00403.csv           ...     88
news_cycle_00404.csv           ...    119
news_cycle_00405.csv           ...     77
news_cycle_00406.csv           ...     83
news_cycle_00407.csv           ...     93
news_cycle_00408.csv           ...     42
news_cycle_00409.csv           ...    179
news_cycle_00410.csv           ...     45
news_cycle_00411.csv           ...     98
news_cycle_00412.csv           ...    109
news_cycle_00413.csv           ...     41
news_cycle_00414.csv           ...    150
news_cycle_00415.csv           ...     25
news_cycle_00416.csv           ...     97
news_cycle_00417.csv           ...    115
news_cycle_00418.csv           ...     66
news_cycle_00419.csv           ...    129
news_cycle_00420.csv           ...     39
news_cycle_00421.csv           ...    183
news_cycle_00422.csv           ...     68
news_cycle_00423.csv           ...    139
news_cycle_00424.csv           ...    111
news_cycle_00425.csv           ...

news_cycle_00604.csv           ...     77
news_cycle_00605.csv           ...    113
news_cycle_00606.csv           ...     25
news_cycle_00607.csv           ...    125
news_cycle_00608.csv           ...     80
news_cycle_00609.csv           ...     40
news_cycle_00610.csv           ...    126
news_cycle_00611.csv           ...     56
news_cycle_00612.csv           ...    110
news_cycle_00613.csv           ...     85
news_cycle_00614.csv           ...     90
news_cycle_00615.csv           ...     73
news_cycle_00616.csv           ...     36
news_cycle_00617.csv           ...    138
news_cycle_00618.csv           ...     51
news_cycle_00619.csv           ...     71
news_cycle_00620.csv           ...    125
news_cycle_00621.csv           ...     23
news_cycle_00622.csv           ...    118
news_cycle_00623.csv           ...     94
news_cycle_00624.csv           ...     28
news_cycle_00625.csv           ...     99
news_cycle_00626.csv           ...     60
news_cycle_00627.csv           ...

news_cycle_00808.csv           ...    113
news_cycle_00809.csv           ...     76
news_cycle_00810.csv           ...     52
news_cycle_00811.csv           ...    162
news_cycle_00812.csv           ...     45
news_cycle_00813.csv           ...     97
news_cycle_00814.csv           ...     99
news_cycle_00815.csv           ...     77
news_cycle_00816.csv           ...    156
news_cycle_00817.csv           ...     64
news_cycle_00818.csv           ...     91
news_cycle_00819.csv           ...    184
news_cycle_00820.csv           ...     39
news_cycle_00821.csv           ...    116
news_cycle_00822.csv           ...    128
news_cycle_00823.csv           ...     31
news_cycle_00824.csv           ...    113
news_cycle_00825.csv           ...    104
news_cycle_00826.csv           ...     46
news_cycle_00827.csv           ...    150
news_cycle_00828.csv           ...     36
news_cycle_00829.csv           ...    108
news_cycle_00830.csv           ...    126
news_cycle_00831.csv           ...

news_cycle_01012.csv           ...    163
news_cycle_01013.csv           ...     75
news_cycle_01014.csv           ...    157
news_cycle_01015.csv           ...     81
news_cycle_01016.csv           ...    154
news_cycle_01017.csv           ...    147
news_cycle_01018.csv           ...     99
news_cycle_01019.csv           ...    209
news_cycle_01020.csv           ...     73
news_cycle_01021.csv           ...    158
news_cycle_01022.csv           ...     48
news_cycle_01023.csv           ...    140
news_cycle_01024.csv           ...    130
news_cycle_01025.csv           ...    172
news_cycle_01026.csv           ...     70
news_cycle_01027.csv           ...    148
news_cycle_01028.csv           ...    129
news_cycle_01029.csv           ...     65
news_cycle_01030.csv           ...    162
news_cycle_01031.csv           ...     38
news_cycle_01032.csv           ...    179
news_cycle_01033.csv           ...     47
news_cycle_01034.csv           ...    128
news_cycle_01035.csv           ...

news_cycle_01217.csv           ...    171
news_cycle_01218.csv           ...     66
news_cycle_01219.csv           ...    102
news_cycle_01220.csv           ...    138
news_cycle_01221.csv           ...     54
news_cycle_01222.csv           ...    194
news_cycle_01223.csv           ...     75
news_cycle_01224.csv           ...    112
news_cycle_01225.csv           ...    102
news_cycle_01226.csv           ...     38
news_cycle_01227.csv           ...    203
news_cycle_01228.csv           ...     60
news_cycle_01229.csv           ...    157
news_cycle_01230.csv           ...     64
news_cycle_01231.csv           ...     98
news_cycle_01232.csv           ...     64
news_cycle_01233.csv           ...     83
news_cycle_01234.csv           ...    144
news_cycle_01235.csv           ...     81
news_cycle_01236.csv           ...    155
news_cycle_01237.csv           ...     43
news_cycle_01238.csv           ...    158
news_cycle_01239.csv           ...     67
news_cycle_01240.csv           ...

news_cycle_01424.csv           ...    149
news_cycle_01425.csv           ...    138
news_cycle_01426.csv           ...    156
news_cycle_01427.csv           ...    126
news_cycle_01428.csv           ...    108
news_cycle_01429.csv           ...    187
news_cycle_01430.csv           ...     33
news_cycle_01431.csv           ...    177
news_cycle_01432.csv           ...     85
news_cycle_01433.csv           ...    127
news_cycle_01434.csv           ...    184
news_cycle_01435.csv           ...     50
news_cycle_01436.csv           ...    149
news_cycle_01437.csv           ...     85
news_cycle_01438.csv           ...    121
news_cycle_01439.csv           ...    149
news_cycle_01440.csv           ...     88
news_cycle_01441.csv           ...    166
news_cycle_01442.csv           ...     86
news_cycle_01443.csv           ...    147
news_cycle_01444.csv           ...    154
news_cycle_01445.csv           ...     56
news_cycle_01446.csv           ...    244
news_cycle_01447.csv           ...

news_cycle_01623.csv           ...    197
news_cycle_01624.csv           ...    166
news_cycle_01625.csv           ...     71
news_cycle_01626.csv           ...    224
news_cycle_01627.csv           ...     79
news_cycle_01628.csv           ...    138
news_cycle_01629.csv           ...    145
news_cycle_01630.csv           ...    110
news_cycle_01631.csv           ...    220
news_cycle_01632.csv           ...    116
news_cycle_01633.csv           ...    238
news_cycle_01634.csv           ...    116
news_cycle_01635.csv           ...    237
news_cycle_01636.csv           ...    183
news_cycle_01637.csv           ...    145
news_cycle_01638.csv           ...    160
news_cycle_01639.csv           ...    130
news_cycle_01640.csv           ...    124
news_cycle_01641.csv           ...    174
news_cycle_01642.csv           ...    111
news_cycle_01643.csv           ...    215
news_cycle_01644.csv           ...     73
news_cycle_01645.csv           ...    191
news_cycle_01646.csv           ...

news_cycle_01830.csv           ...    262
news_cycle_01831.csv           ...     63
news_cycle_01832.csv           ...    262
news_cycle_01833.csv           ...    141
news_cycle_01834.csv           ...    266
news_cycle_01835.csv           ...    112
news_cycle_01836.csv           ...    268
news_cycle_01837.csv           ...     76
news_cycle_01838.csv           ...    297
news_cycle_01839.csv           ...     66
news_cycle_01840.csv           ...    293
news_cycle_01841.csv           ...     73
news_cycle_01842.csv           ...    251
news_cycle_01843.csv           ...     67
news_cycle_01844.csv           ...    229
news_cycle_01845.csv           ...    161
news_cycle_01846.csv           ...    211
news_cycle_01847.csv           ...    109
news_cycle_01848.csv           ...    268
news_cycle_01849.csv           ...    108
news_cycle_01850.csv           ...    262
news_cycle_01851.csv           ...    103
news_cycle_01852.csv           ...    261
news_cycle_01853.csv           ...

news_cycle_02033.csv           ...    249
news_cycle_02034.csv           ...    102
news_cycle_02035.csv           ...    205
news_cycle_02036.csv           ...    187
news_cycle_02037.csv           ...    155
news_cycle_02038.csv           ...    184
news_cycle_02039.csv           ...    178
news_cycle_02040.csv           ...    210
news_cycle_02041.csv           ...     98
news_cycle_02042.csv           ...    176
news_cycle_02043.csv           ...     65
news_cycle_02044.csv           ...    167
news_cycle_02045.csv           ...     82
news_cycle_02046.csv           ...    221
news_cycle_02047.csv           ...    174
news_cycle_02048.csv           ...    230
news_cycle_02049.csv           ...    181
news_cycle_02050.csv           ...    165
news_cycle_02051.csv           ...    200
news_cycle_02052.csv           ...    122
news_cycle_02053.csv           ...    215
news_cycle_02054.csv           ...    132
news_cycle_02055.csv           ...    222
news_cycle_02056.csv           ...

news_cycle_02238.csv           ...    264
news_cycle_02239.csv           ...    145
news_cycle_02240.csv           ...    183
news_cycle_02241.csv           ...    258
news_cycle_02242.csv           ...    113
news_cycle_02243.csv           ...    171
news_cycle_02244.csv           ...    214
news_cycle_02245.csv           ...    144
news_cycle_02246.csv           ...    177
news_cycle_02247.csv           ...    190
news_cycle_02248.csv           ...    138
news_cycle_02249.csv           ...    188
news_cycle_02250.csv           ...    172
news_cycle_02251.csv           ...    102
news_cycle_02252.csv           ...    166
news_cycle_02253.csv           ...    245
news_cycle_02254.csv           ...    110
news_cycle_02255.csv           ...    260
news_cycle_02256.csv           ...    132
news_cycle_02257.csv           ...    158
news_cycle_02258.csv           ...    160
news_cycle_02259.csv           ...    177
news_cycle_02260.csv           ...    274
news_cycle_02261.csv           ...

news_cycle_02442.csv           ...    188
news_cycle_02443.csv           ...    179
news_cycle_02444.csv           ...    138
news_cycle_02445.csv           ...    165
news_cycle_02446.csv           ...    189
news_cycle_02447.csv           ...    135
news_cycle_02448.csv           ...    139
news_cycle_02449.csv           ...    233
news_cycle_02450.csv           ...    208
news_cycle_02451.csv           ...    204
news_cycle_02452.csv           ...    235
news_cycle_02453.csv           ...    132
news_cycle_02454.csv           ...    131
news_cycle_02455.csv           ...    206
news_cycle_02456.csv           ...    166
news_cycle_02457.csv           ...    132
news_cycle_02458.csv           ...    230
news_cycle_02459.csv           ...    210
news_cycle_02460.csv           ...    141
news_cycle_02461.csv           ...    209
news_cycle_02462.csv           ...    216
news_cycle_02463.csv           ...    134
news_cycle_02464.csv           ...    167
news_cycle_02465.csv           ...

news_cycle_02645.csv           ...    198
news_cycle_02646.csv           ...    192
news_cycle_02647.csv           ...    201
news_cycle_02648.csv           ...    188
news_cycle_02649.csv           ...    199
news_cycle_02650.csv           ...    272
news_cycle_02651.csv           ...    148
news_cycle_02652.csv           ...    225
news_cycle_02653.csv           ...    207
news_cycle_02654.csv           ...    113
news_cycle_02655.csv           ...    225
news_cycle_02656.csv           ...    151
news_cycle_02657.csv           ...    175
news_cycle_02658.csv           ...    204
news_cycle_02659.csv           ...    114
news_cycle_02660.csv           ...    190
news_cycle_02661.csv           ...    187
news_cycle_02662.csv           ...    198
news_cycle_02663.csv           ...    218
news_cycle_02664.csv           ...    130
news_cycle_02665.csv           ...    250
news_cycle_02666.csv           ...    222
news_cycle_02667.csv           ...    147
news_cycle_02668.csv           ...

news_cycle_02847.csv           ...    221
news_cycle_02848.csv           ...    218
news_cycle_02849.csv           ...    221
news_cycle_02850.csv           ...    155
news_cycle_02851.csv           ...    161
news_cycle_02852.csv           ...    275
news_cycle_02853.csv           ...    209
news_cycle_02854.csv           ...    222
news_cycle_02855.csv           ...    287
news_cycle_02856.csv           ...    209
news_cycle_02857.csv           ...    211
news_cycle_02858.csv           ...    230
news_cycle_02859.csv           ...    160
news_cycle_02860.csv           ...    200
news_cycle_02861.csv           ...    263
news_cycle_02862.csv           ...    259
news_cycle_02863.csv           ...    275
news_cycle_02864.csv           ...    219
news_cycle_02865.csv           ...    265
news_cycle_02866.csv           ...    223
news_cycle_02867.csv           ...    238
news_cycle_02868.csv           ...    239
news_cycle_02869.csv           ...    196
news_cycle_02870.csv           ...

news_cycle_03044.csv           ...    208
news_cycle_03045.csv           ...    243
news_cycle_03046.csv           ...    252
news_cycle_03047.csv           ...    195
news_cycle_03048.csv           ...    267
news_cycle_03049.csv           ...    346
news_cycle_03050.csv           ...    306
news_cycle_03051.csv           ...    246
news_cycle_03052.csv           ...    312
news_cycle_03053.csv           ...    304
news_cycle_03054.csv           ...    272
news_cycle_03055.csv           ...    201
news_cycle_03056.csv           ...    268
news_cycle_03057.csv           ...    308
news_cycle_03058.csv           ...    280
news_cycle_03059.csv           ...    312
news_cycle_03060.csv           ...    272
news_cycle_03061.csv           ...    258
news_cycle_03062.csv           ...    253
news_cycle_03063.csv           ...    313
news_cycle_03064.csv           ...    244
news_cycle_03065.csv           ...    357
news_cycle_03066.csv           ...    398
news_cycle_03067.csv           ...

news_cycle_03241.csv           ...    325
news_cycle_03242.csv           ...    294
news_cycle_03243.csv           ...    324
news_cycle_03244.csv           ...    354
news_cycle_03245.csv           ...    353
news_cycle_03246.csv           ...    349
news_cycle_03247.csv           ...    323
news_cycle_03248.csv           ...    370
news_cycle_03249.csv           ...    451
news_cycle_03250.csv           ...    454
news_cycle_03251.csv           ...    318
news_cycle_03252.csv           ...    376
news_cycle_03253.csv           ...    431
news_cycle_03254.csv           ...    434
news_cycle_03255.csv           ...    432
news_cycle_03256.csv           ...    420
news_cycle_03257.csv           ...    412
news_cycle_03258.csv           ...    341
news_cycle_03259.csv           ...    368
news_cycle_03260.csv           ...    355
news_cycle_03261.csv           ...    393
news_cycle_03262.csv           ...    347
news_cycle_03263.csv           ...    494
news_cycle_03264.csv           ...

<hr style="height: 5px" />

## /r/savedyouaclick

In [20]:
get_items('savedyouaclick')

savedyouaclick_2014.csv        ...      1
savedyouaclick_2015.csv        ...      2
savedyouaclick_2016.csv        ...   7292


  if self.run_code(code, result):


savedyouaclick_2017.csv        ...   8424
savedyouaclick_2018.csv        ...   3492

Total = 19211
