In [1]:
# Imports 
import pandas as pd

Query used to get weekly data from [Stackoverflow Query](https://data.stackexchange.com/stackoverflow/query/edit/1205218)

```SQL
SELECT Id, CreationDate, Title, Body, Tags, ViewCount, AnswerCount, CommentCount
FROM Posts
WHERE (CreationDate BETWEEN '2019-01-14' AND '2019-01-20') AND Tags IS NOT NULL
ORDER BY CreationDate ASC
```

# Notebook Overview: 
- Combine weekly .csv's into a combined .csv for the year of 2019 
- The following are links to the zipped versions of the weekly .csv files:
    - [Weeks 1 - 20](https://drive.google.com/open?id=1uttbh17hfhnfMLqnajbw3yX7LC27yQFY)
    - [Weeks 21-40](https://drive.google.com/open?id=1Isrpl4XX-Sv3CDmI2eNI26fX8Hu98mt4)
    - [Weeks 41-52](https://drive.google.com/open?id=15FEnPmx_LaP9_xpxB3zz6BMqFDgTbVc6)
- [Combined 2019 data (zipped):](https://drive.google.com/open?id=1ZQHpysA5m8aL3bbMEiUJLdHmVDhZmaBq)
 

## Function

In [4]:
def combine_weekly(n_weeks = 52):
    '''
    Function to combine weekly .csv files into 
    one combined .csv file
    
    Note: For the function to work you will need to download the  
    individual .csv files (linked above) and follow the listed 
    file path: 
    '../data/individual_weekly/[week#.csv]'
    
    '''
    start = 1 
    weekly_list = []
    
    for i in range(start, n_weeks + 1):
        week_i = pd.read_csv(f'../data/individual_weekly/week{i}.csv')
        weekly_list.append(week_i)
    
    combined_csv = pd.concat(weekly_list)
    return combined_csv  

In [5]:
combined_csv = combine_weekly(n_weeks = 52)

In [6]:
combined_csv.head()

Unnamed: 0,Id,CreationDate,Title,Body,Tags,ViewCount,AnswerCount,CommentCount
0,53992215,2019-01-01 00:00:04,Using a ScheduledExecutorService to run a task...,<p>I am working on a program that will read da...,<java><executorservice><java-threads>,73,1,3
1,53992219,2019-01-01 00:01:55,How to programmatically change style sheet of ...,<p>I have so many buttons on a dialog and I wa...,<c++><qt><qt5><qtstylesheets><qpushbutton>,775,2,2
2,53992220,2019-01-01 00:02:08,Trying to put website into Maintenance Mode (3...,<p>I'm trying to put my webpage into Maintenan...,<.htaccess>,364,2,13
3,53992221,2019-01-01 00:02:27,Node.js Lambda Async return Undefined,<p>Simple call to ec2 Describing Security grou...,<node.js><lambda>,423,3,3
4,53992223,2019-01-01 00:02:37,Unable to print a class list attribute using i...,<p>I am designing a deck class that has <stron...,<python><python-3.x><list><class><printing>,40,2,0


In [7]:
combined_csv.tail()

Unnamed: 0,Id,CreationDate,Title,Body,Tags,ViewCount,AnswerCount,CommentCount
32608,59538122,2019-12-30 23:58:11,Bootstrap - Nav bar not working correctly,"<p>In my project, I implement bootstrap. I als...",<html><twitter-bootstrap><bootstrap-4>,33,0,9
32609,59538123,2019-12-30 23:58:19,DOMXpath query returns nothing and I can't fin...,<p>I have the following content in a wordpress...,<php><html><wordpress><dom><xpath>,36,1,0
32610,59538126,2019-12-30 23:58:27,Excel Formula to sum 12 most recent cells,<p>I have an Excel formula question that I was...,<excel><excel-formula><sum>,71,3,4
32611,59538127,2019-12-30 23:58:30,How do I divide my screen in 2 equal parts usi...,<p>I would like to divide my screen in two hal...,<android><android-constraintlayout>,50,1,1
32612,59538132,2019-12-30 23:59:28,HttpContext is NULL when running web app in IIS,"<p>I have two application, both running on the...",<asp.net-core><iis><identityserver4><blazor-se...,159,1,3


In [8]:
combined_csv.shape

(1878570, 8)

In [9]:
# Save combined csv to data folder 
# will use this file in the next notbook: 02 preprocessing_cleaning 
combined_csv.to_csv('../data/weekly_combined2019.csv', index=False)