/
Keep_only_tweets_in_time_range.py
38 lines (31 loc) · 1.38 KB
/
Keep_only_tweets_in_time_range.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
"""
@author: Iulia Cioroianu
Last modified: April 15, 2014
Purpose: Only keep tweets between 05/06/2012 and 11/07/2012
Input data: Candidate tweets, one .csv per candidate, one tweet per line
Output data: Candidate tweets in time range, one .csv per candidate, one tweet per line
"""
import re
import csv
import glob, os
#############################################################
#Keep only relevant tweets
#############################################################
from datetime import datetime
# Only keep tweets between 2012-06-06 and 2012-11-07
start_date=datetime(2012,05,06)
end_date=datetime(2012,11,07)
thepath=glob.glob('C:/Data/Candidate_tweets/Processing_tweets/*.csv')
for i in thepath:
base=os.path.basename(i)
filename=os.path.splitext(base)[0]
file_name=re.sub('_tweets','', filename)
with open(i,'r') as cand_input:
with open('C:/Data/Candidate_tweets/Processing_tweets/Shortened_candidates/%s.csv' %file_name, 'w') as cand_output:
writer = csv.writer(cand_output, lineterminator='\n')
for row in csv.reader(cand_input):
if row[1]!="created_at":
date=datetime.strptime(row[1],'%Y-%m-%d %H:%M:%S')
if date>start_date and date<end_date: writer.writerow(row)
################################################################