# MongoDB Load CSV with python

In [12]:
%matplotlib inline
import pymongo
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
from bson import json_util, ObjectId
import pandas as pd
from pandas import DataFrame
from pandas.io.json import json_normalize
import numpy as np
import requests
import json, os
import configparser
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import warnings
import random
import pprint
from datetime import datetime
random.seed(datetime.now())
warnings.filterwarnings('ignore')

# Make plots larger
plt.rcParams['figure.figsize'] = (10, 6)

In [13]:
config = configparser.ConfigParser()
config.read('./data/mongo.ini')

['./data/mongo.ini']

In [14]:
client = MongoClient(config.get('Mongo', 'host'), int(config.get('Mongo', 'port')))

In [15]:
db=client.tweets

In [16]:
db.collection_names(include_system_collections=False)

[]

In [17]:
try:
    result = db.tweets.drop()
    print ("analytics tweets dropped")
except:
    pass

analytics tweets dropped


In [18]:
def load_csv(csv):
    p=os.path.join("data/", csv)
    print (p)
    data=pd.read_csv(p, encoding = "ISO-8859-1", engine='python')
    return data 

In [19]:
tweets_csv=load_csv('Twitter.csv')
tweets_csv.head()

data/Twitter.csv


Unnamed: 0,id,created_at,retweet_count,user_screen_name,user_followers_count,user_location,hashtags,text
0,967184683178930000,2018-02-23 23:49:32,36,1AvgGirl,45,"California, USA",[],RT @netflix: When Letitia Wright gets behind t...
1,967184696890352000,2018-02-23 23:49:35,1,sadiepeaches,5225,cabin 4 ? i was here too. VM,[],RT @its_willyu: STOP. WRITING. AROUND. US. @ne...
2,967184759792263000,2018-02-23 23:49:50,0,csgoKW,76,"Cape Town, South Africa",[],@lostinspacetv @netflix Looks Amazing! Can?t w...
3,967184785310409000,2018-02-23 23:49:56,0,MaxContrerasR,177,Room #427,[],@joelmchale @netflix @NetflixLAT Siempre quere...
4,967184841467974000,2018-02-23 23:50:10,0,WAVEDUPZO,5195,WESTCOAST ?? HOUSTON,"[ ""Duh""]",I think @netflix should buy the rights to ?Pop...


In [20]:
tweets_j=json.loads(tweets_csv.to_json(orient='records'))
tweets_j[0]

{'created_at': '2018-02-23 23:49:32',
 'hashtags': '[]',
 'id': 967184683178930000,
 'retweet_count': 36,
 'text': 'RT @netflix: When Letitia Wright gets behind the wheel, you know something badass is about to happen.',
 'user_followers_count': 45,
 'user_location': 'California, USA',
 'user_screen_name': '1AvgGirl'}

In [21]:
print(tweets_j[0]['text'])

RT @netflix: When Letitia Wright gets behind the wheel, you know something badass is about to happen.


In [22]:
def tweet_json(tid,text,created):
    j={
    "tweet_id" : tid,
    "text" : text,
    "created_at" : created}
    return j

In [31]:
# Twitter dates are of the form Sun Mar 15 21:41:54 +0000 2015
datestrings=['Sun Mar 15 21:41:54 +0000 2015','Tue Mar 29 08:11:25 +0000 2011']
datestringsTwo=['2018-02-23 23:49:32','2018-02-23 23:49:32']
from datetime import timedelta
from email.utils import parsedate_tz
from dateutil.parser import parse

def to_datetime(datestring):   
    time_tuple = parse(datestring.strip())   
    dt = datetime.strptime(datestring.strip(), '%Y-%m-%d %H:%M:%S') 
    return dt

ts=to_datetime(datestringsTwo[0])
print (ts.strftime("%Y-%m-%d %H:%M"))
ts=to_datetime(datestringsTwo[1])
print (ts.strftime("%Y-%m-%d %H:%M"))

2018-02-23 23:49
2018-02-23 23:49


In [32]:
cnt=0
for tweet in tweets_j:
    ts=datetime.now()
    try:
        ts=to_datetime(tweet['created_at'])
    except:
        continue     
    try:
        j=tweet_json(tweet['id'],tweet['text'],ts)
        result = db.tweets.insert_one(j)
        cnt+=1
    except:
        pass       
print ("%d tweets inserted."%cnt)

534 tweets inserted.


In [33]:
for tweet in db.tweets.find():
    pprint.pprint(tweet)

{'_id': ObjectId('5ac44aafa313fc07c984ebac'),
 'created_at': datetime.datetime(2018, 2, 23, 23, 49, 32),
 'text': 'RT @netflix: When Letitia Wright gets behind the wheel, you know '
         'something badass is about to happen.',
 'tweet_id': 967184683178930000}
{'_id': ObjectId('5ac44aafa313fc07c984ebad'),
 'created_at': datetime.datetime(2018, 2, 23, 23, 49, 35),
 'text': 'RT @its_willyu: STOP. WRITING. AROUND. US. @netflix',
 'tweet_id': 967184696890352000}
{'_id': ObjectId('5ac44aafa313fc07c984ebae'),
 'created_at': datetime.datetime(2018, 2, 23, 23, 49, 50),
 'text': '@lostinspacetv @netflix Looks Amazing! Can?t wait!',
 'tweet_id': 967184759792263000}
{'_id': ObjectId('5ac44aafa313fc07c984ebaf'),
 'created_at': datetime.datetime(2018, 2, 23, 23, 49, 56),
 'text': '@joelmchale @netflix @NetflixLAT Siempre queremos m\xa0s!',
 'tweet_id': 967184785310409000}
{'_id': ObjectId('5ac44aafa313fc07c984ebb0'),
 'created_at': datetime.datetime(2018, 2, 23, 23, 50, 10),
 'text': 'I think @n

{'_id': ObjectId('5ac44ab0a313fc07c984ec47'),
 'created_at': datetime.datetime(2018, 2, 24, 23, 46, 14),
 'text': '@coltsmith2714 @SavageTV1 @LeonardRaymundo @eveirecarolina '
         '@TommyKeavy @its_willyu @netflix I feel you on that. Tha?',
 'tweet_id': 967546242032198000}
{'_id': ObjectId('5ac44ab0a313fc07c984ec48'),
 'created_at': datetime.datetime(2018, 2, 24, 23, 46, 16),
 'text': "@netflix What did I just watch, and why does this exist? I'm so "
         'beyond confused??',
 'tweet_id': 967546249770660000}
{'_id': ObjectId('5ac44ab0a313fc07c984ec49'),
 'created_at': datetime.datetime(2018, 2, 24, 23, 46, 18),
 'text': 'Mis amores, ya vieron #Eldesconocido por @netflix No se pierdan la '
         'vida de Karla en esta historia ???',
 'tweet_id': 967546257093906000}
{'_id': ObjectId('5ac44ab0a313fc07c984ec4a'),
 'created_at': datetime.datetime(2018, 2, 24, 23, 46, 23),
 'text': 'RT @billburr: It?s about f***ing time! #FIsForFamily is back May 30 '
         'on @Netflix.',
 't

 'tweet_id': 967905783332721000}
{'_id': ObjectId('5ac44ab0a313fc07c984ed08'),
 'created_at': datetime.datetime(2018, 2, 25, 23, 34, 56),
 'text': 'RT @SkylarAstin: Living for this reboot rn tbh?? #canubelieve!? '
         '@QueerEye @netflix #repost @therealannacamp ??',
 'tweet_id': 967905784431632000}
{'_id': ObjectId('5ac44ab0a313fc07c984ed09'),
 'created_at': datetime.datetime(2018, 2, 25, 23, 34, 58),
 'text': '@moworldwide @TiffanyHaddish @AphroditeYoung @netflix @amyschumer '
         'Your reply was everything.',
 'tweet_id': 967905791935214000}
{'_id': ObjectId('5ac44ab0a313fc07c984ed0a'),
 'created_at': datetime.datetime(2018, 2, 25, 23, 35, 7),
 'text': 'RT @moworldwide: @TiffanyHaddish @AphroditeYoung @netflix '
         "@amyschumer You've done nothing wrong my sister. You KEEP SHINING! I "
         'am proud of?',
 'tweet_id': 967905830560522000}
{'_id': ObjectId('5ac44ab0a313fc07c984ed0b'),
 'created_at': datetime.datetime(2018, 2, 25, 23, 35, 8),
 'text': 'Trying to co

Updated October 3, 2017