In [1]:
import csv
def generate_formatted_csv():
    raw_file_path = "/Users/zhourb21/zhazha/project/ows-raw"
    file = open('/Users/zhourb21/zhazha/project/ows-raw-formated', 'w+')
    writer = csv.writer(file)

    with open(raw_file_path, 'rb') as csvfile:
        lines = csv.reader( (line.replace('\0','') for line in csvfile), delimiter=',', quotechar='"' )
        counter = 0
        for line in lines:
            count = len(line)
            if count == 14:
                writer.writerow(line)
            counter += 1
generate_formatted_csv()
In [2]:
def save_by_date():
    path = "/Users/zhourb21/zhazha/project/ows-raw-formatted"
    with open(path, "rb") as f:
        lines = csv.reader(f, delimiter=',', quotechar='"')
        date_tweet_map = {}

        counter = 0
        for line in lines:
            counter += 1
            if counter == 1:
                continue
            day = line[3]
            if day in date_tweet_map:
                date_tweet_map[day].append(line)
            else:
                date_tweet_map[day] = [line]

        days = date_tweet_map.keys()

        statistic = open('/Users/zhourb21/zhazha/project/data-by-day/stat', 'w+')
        for _day in days:
            data = date_tweet_map[_day]
            statistic.write(str(_day) + ": " + str(len(data))+ '\n')
            dayFile = open('/Users/zhourb21/zhazha/project/data-by-day/' + _day, 'w+')
            writer = csv.writer(dayFile)
            for datum in data:
                writer.writerow(datum)
        statistic.close()
save_by_date()
In [3]:
with open('/Users/zhourb21/zhazha/project/data-by-day/stat', 'rb') as f:
    for line in f.readlines():
        print line
2012-01-23: 14896

2011-10-25: 74641

2011-10-24: 64571

2011-12-10: 33987

2011-12-11: 26604

2011-12-12: 50448

2011-12-13: 39982

2011-12-14: 39399

2011-12-15: 36426

2011-12-16: 28107

2011-12-17: 37983

2011-12-18: 29022

2011-12-19: 20893

2012-02-16: 12837

2012-02-17: 12468

2012-02-14: 13580

2012-02-15: 13248

2012-02-12: 11178

2012-02-13: 13600

2012-02-10: 14083

2012-02-11: 12920

2012-02-18: 4859

2011-12-07: 45440

2011-12-06: 46917

2011-12-05: 42429

2011-12-04: 42326

2011-12-03: 35760

2011-12-02: 43135

2011-12-01: 48062

2011-11-20: 99365

2011-11-21: 87818

2011-11-22: 81802

2011-11-23: 61471

2011-11-24: 40522

2011-11-25: 40930

2011-12-09: 43030

2011-12-08: 40733

2012-02-05: 13656

2012-02-04: 16591

2012-02-07: 14940

2012-02-06: 14446

2012-02-01: 16578

2012-02-03: 14371

2012-02-02: 14995

2012-02-09: 13944

2012-02-08: 13451

2011-11-11: 48997

2011-10-31: 64256

2011-11-28: 53505

2012-01-28: 11530

2011-11-29: 40152

2012-01-26: 15159

2012-01-27: 11992

2012-01-24: 14290

2012-01-25: 18812

2012-01-22: 11948

2011-10-29: 75741

2012-01-20: 19840

2012-01-21: 15459

2011-10-28: 87216

2011-11-30: 59289

2011-10-23: 64808

2011-10-22: 70368

2011-10-21: 75256

2011-10-20: 73159

2011-10-27: 137172

2012-01-31: 18682

2012-01-30: 27198

2011-10-26: 141729

2011-11-26: 44523

2011-11-27: 42614

2012-01-01: 33192

2012-01-02: 19529

2012-01-03: 22790

2012-01-04: 16827

2012-01-05: 11227

2012-01-06: 10770

2012-01-07: 14432

2012-01-08: 15209

2012-01-09: 16589

2011-11-01: 62286

2011-10-12: 67753

2011-10-13: 98954

2011-10-10: 78619

2011-10-11: 67596

2011-10-16: 165381

2011-10-17: 113628

2011-10-14: 148062

2011-10-15: 161802

2011-10-18: 103337

2011-10-19: 88874

2012-01-17: 20692

2012-01-16: 15844

2012-01-15: 16264

2012-01-14: 15046

2012-01-13: 15683

2012-01-12: 17705

2012-01-11: 22665

2012-01-10: 16829

2012-01-19: 14533

2012-01-18: 18162

2011-11-06: 55511

2011-11-07: 52820

2011-11-04: 63529

2011-11-05: 65019

2011-11-02: 74554

2011-11-03: 92698

2011-10-07: 65238

2011-10-06: 49638

2011-10-09: 65097

2011-10-08: 65949

2011-11-08: 47453

2011-11-09: 46503

2011-12-30: 17367

2011-12-31: 14692

2011-11-15: 409075

2011-11-14: 59815

2011-11-17: 279349

2011-11-16: 126520

2011-10-30: 77813

2011-11-10: 52664

2011-11-13: 45470

2011-11-12: 38189

2011-11-19: 107747

2011-11-18: 154395

2012-01-29: 27219

2011-12-29: 17100

2011-12-28: 19117

2011-12-25: 10538

2011-12-24: 14196

2011-12-27: 16772

2011-12-26: 12515

2011-12-21: 20091

2011-12-20: 24045

2011-12-23: 19031

2011-12-22: 21972

In [ ]: