### Do required imports

In [34]:
from __future__ import print_function
import tweepy
import json
from pymongo import MongoClient

### Setup your mongo path

In [35]:
MONGO_HOST = 'mongodb://localhost/twitterdb'

### Prepare a search keywords

In [36]:
WORDS =['#bigdata','#AI','#datascience','#machinelearning','#ml','#iot']

### Set up Twitter API tokens and consumder keys

In [37]:
# load credentails from twitter_credentails.json
with open('twitter_crdentials.json','r') as file:
    creds = json.load(file)

### Create a StreamListener Class to scrap twitter API

In [11]:
class StreamListener(tweepy.StreamListener):
    # This is the class provided by tweepy to access twitter Streaming API

    def on_connect(self):
        # called initially to connect to the streaming API
        print("you are connected to the streaming API")
    
    
    def on_error(self,status_code):
        # on error - if an error occurs displays the error / status code
        print("An Error has occured: " + repr(status_code))
        return False
    
    def on_data(self,data):
        # this part is main script where we connect to MongoDB and stores the tweet
        try:
            client = MongoClient(MONGO_HOST)
            
            # use twitterdb database. if it doesn't existed, it will be created
            db= client.twitterdb
            
            # Decode the JSON response from Twitter
            datajson = json.loads(data)
            
            # grab the Created_at data from the tweet to use for display
            created_at = datajson['created_at']
            
            # print out the message on every successful tweet we collected
            print("Tweet collected at " + str(created_at))
            
            # drop db if its existed
            db.twitter_search.drop()
            
            # insert the data into the mongodb collection called twitter_search
            # it twitter_search is not existed, it will be created          
            db.twitter_search.insert(datajson)
        
        except Exception as e:
            print(e)

        
        

In [10]:
 # authentification so we can access twitter
auth = tweepy.OAuthHandler(creds['CONSUMER_KEY'],creds['CONSUMER_SECRET'])
auth.set_access_token(creds['ACCESS_KEY'], creds['ACCESS_SECRET'])
api =tweepy.API(auth, wait_on_rate_limit=True)

# create instance of Streamlistener
listener = StreamListener(api = api)
stream = tweepy.Stream(auth, listener = listener)

#track = ['golf', 'masters', 'reed', 'mcilroy', 'woods']
#track = ['nba', 'cavs', 'celtics', 'basketball']
# choose what we want to filter by
#print('Tracking: ' +str(WORDS))

# create a filter with query and targeted language
stream.filter(track = WORDS,languages=['en'])

you are connected to the streaming API
Tweet collected at Tue Dec 15 20:20:02 +0000 2020
Tweet collected at Tue Dec 15 20:20:02 +0000 2020
Tweet collected at Tue Dec 15 20:20:02 +0000 2020


  db.twitter_search.insert(datajson)


Tweet collected at Tue Dec 15 20:20:05 +0000 2020


KeyboardInterrupt: 

### Read data from mongoDB

In [269]:
import pymongo
import pandas as pd
from pymongo import MongoClient

In [270]:
client = MongoClient("mongodb://localhost:27017")

In [271]:
db = client['twitterdb']

In [272]:
#db.tweets.drop()

In [273]:
tweets = db['twitter_search']

In [274]:
df = pd.DataFrame(list(tweets.find()))

In [275]:
df

Unnamed: 0,_id,created_at,id,id_str,text,source,truncated,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,...,filter_level,lang,timestamp_ms,quoted_status_id,quoted_status_id_str,quoted_status,quoted_status_permalink,retweeted_status,display_text_range,extended_entities
0,5fd3ba11648b265ff4391cd6,Fri Dec 11 18:27:24 +0000 2020,1337464014419480576,1337464014419480576,Interesting... Machine Learning and AI - What ...,"<a href=""http://www.bitechwatch.com"" rel=""nofo...",True,,,,...,low,en,1607711244574,,,,,,,
1,5fd3ba14648b265ff4391cd8,Fri Dec 11 18:27:27 +0000 2020,1337464027719499777,1337464027719499777,Take a look at these open positions in applied...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,,,,...,low,en,1607711247745,1.337462e+18,1337462337393938432,{'created_at': 'Fri Dec 11 18:20:44 +0000 2020...,"{'url': 'https://t.co/lDfGEjmOYF', 'expanded':...",,,
2,5fd3ba15648b265ff4391cda,Fri Dec 11 18:27:28 +0000 2020,1337464032564027392,1337464032564027392,RT @Xbond49: Honored &amp; humbled to be in th...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,...,low,en,1607711248900,,,,,{'created_at': 'Fri Dec 11 18:20:23 +0000 2020...,,
3,5fd3ba17648b265ff4391cdc,Fri Dec 11 18:27:30 +0000 2020,1337464037680955392,1337464037680955392,@sciencebase add #IoT data/log with current lo...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.337436e+18,1337435563348127745,6612402.0,...,low,en,1607711250120,,,,,,"[13, 140]",
4,5fd3ba1a648b265ff4391cde,Fri Dec 11 18:27:33 +0000 2020,1337464049794076672,1337464049794076672,iShares Robotics And Artificial Intelligence M...,"<a href=""https://dlvrit.com/"" rel=""nofollow"">d...",True,,,,...,low,en,1607711253008,,,,,,"[0, 140]",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10784,5fd911b1b5c4c99a8d60d81b,Tue Dec 15 19:42:36 +0000 2020,1338932489549533184,1338932489549533184,RT @stpiindia: #AI can empower manufacturing c...,"<a href=""http://twitter.com/download/android"" ...",False,,,,...,low,en,1608061356328,,,,,{'created_at': 'Wed Dec 09 10:23:59 +0000 2020...,,
10785,5fd91a77b5c4c99a8d60d81d,Tue Dec 15 20:20:02 +0000 2020,1338941911349026820,1338941911349026820,"To battle the effects of COVID-19, the airline...","<a href=""https://coschedule.com"" rel=""nofollow...",True,,,,...,low,en,1608063602660,,,,,,,
10786,5fd91a77b5c4c99a8d60d81f,Tue Dec 15 20:20:02 +0000 2020,1338941910916993024,1338941910916993024,Here is something to think about: As financial...,"<a href=""https://app.sendible.com"" rel=""nofoll...",True,,,,...,low,en,1608063602557,,,,,,"[0, 140]",
10787,5fd91a77b5c4c99a8d60d821,Tue Dec 15 20:20:02 +0000 2020,1338941911516782594,1338941911516782594,Multiple Linear Regression in Machine Learning...,"<a href=""https://buffer.com"" rel=""nofollow"">Bu...",True,,,,...,low,en,1608063602700,,,,,,"[0, 140]",


#### Identify Duplicate rows

In [276]:
duplicates = df[df.duplicated('id')]
duplicates

Unnamed: 0,_id,created_at,id,id_str,text,source,truncated,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,...,filter_level,lang,timestamp_ms,quoted_status_id,quoted_status_id_str,quoted_status,quoted_status_permalink,retweeted_status,display_text_range,extended_entities


In [277]:
# prepare a list to Drop unwanted columns
list_unwanted_col = ['_id','id','truncated','in_reply_to_status_id','in_reply_to_status_id_str','in_reply_to_user_id','in_reply_to_user_id_str',\
                    'in_reply_to_screen_name','geo','coordinates','retweet_count','place','contributors','is_quote_status','extended_tweet','quote_count','reply_count',\
                    'favorite_count','entities','favorited','retweeted','possibly_sensitive','filter_level','timestamp_ms','quoted_status_id',\
                    'quoted_status_id_str','quoted_status','quoted_status_permalink','retweeted_status','display_text_range','extended_entities','created_at', 'source']

# create a new data frame with clean data
new_df = df.drop(list_unwanted_col,axis=1)

In [278]:
new_df.head()

Unnamed: 0,id_str,text,user,lang
0,1337464014419480576,Interesting... Machine Learning and AI - What ...,"{'id': 22631958, 'id_str': '22631958', 'name':...",en
1,1337464027719499777,Take a look at these open positions in applied...,"{'id': 1079825507737202688, 'id_str': '1079825...",en
2,1337464032564027392,RT @Xbond49: Honored &amp; humbled to be in th...,"{'id': 716658880508510208, 'id_str': '71665888...",en
3,1337464037680955392,@sciencebase add #IoT data/log with current lo...,"{'id': 15081182, 'id_str': '15081182', 'name':...",en
4,1337464049794076672,iShares Robotics And Artificial Intelligence M...,"{'id': 856240505826496513, 'id_str': '85624050...",en


### prepare a User_df to hold just user information

In [279]:
user_df = pd.DataFrame.from_dict([new_df['user'][i] for i in range(len(new_df['user']))])

In [280]:
user_df.columns

Index(['id', 'id_str', 'name', 'screen_name', 'location', 'url', 'description',
       'translator_type', 'protected', 'verified', 'followers_count',
       'friends_count', 'listed_count', 'favourites_count', 'statuses_count',
       'created_at', 'utc_offset', 'time_zone', 'geo_enabled', 'lang',
       'contributors_enabled', 'is_translator', 'profile_background_color',
       'profile_background_image_url', 'profile_background_image_url_https',
       'profile_background_tile', 'profile_link_color',
       'profile_sidebar_border_color', 'profile_sidebar_fill_color',
       'profile_text_color', 'profile_use_background_image',
       'profile_image_url', 'profile_image_url_https', 'profile_banner_url',
       'default_profile', 'default_profile_image', 'following',
       'follow_request_sent', 'notifications'],
      dtype='object')

In [281]:
user_df

Unnamed: 0,id,id_str,name,screen_name,location,url,description,translator_type,protected,verified,...,profile_text_color,profile_use_background_image,profile_image_url,profile_image_url_https,profile_banner_url,default_profile,default_profile_image,following,follow_request_sent,notifications
0,22631958,22631958,Chuck Russell,cichuck,"Harrisburg, PA",http://www.collectiveintelligence.com,Founder Collective Intelligence #TheFutureOfW...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1071464844...,https://pbs.twimg.com/profile_images/107146484...,https://pbs.twimg.com/profile_banners/22631958...,False,False,,,
1,1079825507737202688,1079825507737202688,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",http://theaaih.org,Global organization to educate and advocate fo...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1128344835...,https://pbs.twimg.com/profile_images/112834483...,https://pbs.twimg.com/profile_banners/10798255...,False,False,,,
2,716658880508510208,716658880508510208,Edge Technology News,NewsEdgetech,Online,,Technology Social Channel. #EdgeComputing #IoT...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1157563564...,https://pbs.twimg.com/profile_images/115756356...,https://pbs.twimg.com/profile_banners/71665888...,True,False,,,
3,15081182,15081182,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV",http://WarrenWhitlock.com,"Founder/CEO Stirling, publishing, emergingtech...",none,False,True,...,000000,False,http://pbs.twimg.com/profile_images/1058800581...,https://pbs.twimg.com/profile_images/105880058...,https://pbs.twimg.com/profile_banners/15081182...,False,False,,,
4,856240505826496513,856240505826496513,Suriya Subramanian,SuriyaSubraman,"London, UK",https://www.linkedin.com/in/suriyansubramanian/,Data driven Change consultant \nhttp://finperf...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/8746975191...,https://pbs.twimg.com/profile_images/874697519...,,True,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10784,2190078902,2190078902,Bhaskar Dev,vskrdev,"New Delhi, India",,Illustrator and Graphic Designer,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/3788000007...,https://pbs.twimg.com/profile_images/378800000...,https://pbs.twimg.com/profile_banners/21900789...,False,False,,,
10785,776150730721951745,776150730721951745,IoT For All,iotforall,United States,https://www.iotforall.com,http://www.iotforall.com is the world's hub fo...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1095437721...,https://pbs.twimg.com/profile_images/109543772...,https://pbs.twimg.com/profile_banners/77615073...,False,False,,,
10786,217567849,217567849,Matt Reiner,mattreiner,Atlanta,,"CEO, Co-Founder @get_benjamin. Author of ""Read...",none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1039241002...,https://pbs.twimg.com/profile_images/103924100...,https://pbs.twimg.com/profile_banners/21756784...,False,False,,,
10787,1095983634773815296,1095983634773815296,Calsoft Inc,Calsoft_Data,,,Looking to launch new products or roll out upd...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1095985545...,https://pbs.twimg.com/profile_images/109598554...,,True,False,,,


#### Identify Duplicate rows from user_df

In [282]:
user_df.loc[user_df['id'] == 1331034000404647936 ]

Unnamed: 0,id,id_str,name,screen_name,location,url,description,translator_type,protected,verified,...,profile_text_color,profile_use_background_image,profile_image_url,profile_image_url_https,profile_banner_url,default_profile,default_profile_image,following,follow_request_sent,notifications
14,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
16,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
22,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
24,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
777,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
791,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
803,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
808,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
811,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
1856,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,


In [283]:
duplicates = user_df[user_df.duplicated('id')]
duplicates

Unnamed: 0,id,id_str,name,screen_name,location,url,description,translator_type,protected,verified,...,profile_text_color,profile_use_background_image,profile_image_url,profile_image_url_https,profile_banner_url,default_profile,default_profile_image,following,follow_request_sent,notifications
16,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
19,955443550048608257,955443550048608257,nodejs-bot,morolswediu,"Dhaka, Bangladesh",https://jinnatul.github.io/,"I am a bot, develop by @jinnatul_md, to retwee...",none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1336415584...,https://pbs.twimg.com/profile_images/133641558...,https://pbs.twimg.com/profile_banners/95544355...,True,False,,,
21,1277176003840872448,1277176003840872448,CEO TT36 & Friends GmbH,tt36crew,UNITED STATES OF EUROPE,http://www.tt36.de/,#TT36GansterLimitedLiabilitiesClub *Oranienstr...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1296413416...,https://pbs.twimg.com/profile_images/129641341...,,True,False,,,
22,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
24,1331034000404647936,1331034000404647936,MedAI Bot,medaibot,,https://Cardiologytimes.today,Created by Doctor @KrittanawongMD. A bot that ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1331276094...,https://pbs.twimg.com/profile_images/133127609...,https://pbs.twimg.com/profile_banners/13310340...,True,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10782,1260015280048222208,1260015280048222208,BOT Kitty 🐈,BotRaj1,,,A bot who likes and retweet your progress for ...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1298179046...,https://pbs.twimg.com/profile_images/129817904...,https://pbs.twimg.com/profile_banners/12600152...,True,False,,,
10784,2190078902,2190078902,Bhaskar Dev,vskrdev,"New Delhi, India",,Illustrator and Graphic Designer,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/3788000007...,https://pbs.twimg.com/profile_images/378800000...,https://pbs.twimg.com/profile_banners/21900789...,False,False,,,
10785,776150730721951745,776150730721951745,IoT For All,iotforall,United States,https://www.iotforall.com,http://www.iotforall.com is the world's hub fo...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1095437721...,https://pbs.twimg.com/profile_images/109543772...,https://pbs.twimg.com/profile_banners/77615073...,False,False,,,
10787,1095983634773815296,1095983634773815296,Calsoft Inc,Calsoft_Data,,,Looking to launch new products or roll out upd...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1095985545...,https://pbs.twimg.com/profile_images/109598554...,,True,False,,,


#### Drop Duplicate rows from user_df

In [284]:
user_df=user_df.drop_duplicates(subset=['id'])
user_df

Unnamed: 0,id,id_str,name,screen_name,location,url,description,translator_type,protected,verified,...,profile_text_color,profile_use_background_image,profile_image_url,profile_image_url_https,profile_banner_url,default_profile,default_profile_image,following,follow_request_sent,notifications
0,22631958,22631958,Chuck Russell,cichuck,"Harrisburg, PA",http://www.collectiveintelligence.com,Founder Collective Intelligence #TheFutureOfW...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1071464844...,https://pbs.twimg.com/profile_images/107146484...,https://pbs.twimg.com/profile_banners/22631958...,False,False,,,
1,1079825507737202688,1079825507737202688,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",http://theaaih.org,Global organization to educate and advocate fo...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1128344835...,https://pbs.twimg.com/profile_images/112834483...,https://pbs.twimg.com/profile_banners/10798255...,False,False,,,
2,716658880508510208,716658880508510208,Edge Technology News,NewsEdgetech,Online,,Technology Social Channel. #EdgeComputing #IoT...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1157563564...,https://pbs.twimg.com/profile_images/115756356...,https://pbs.twimg.com/profile_banners/71665888...,True,False,,,
3,15081182,15081182,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV",http://WarrenWhitlock.com,"Founder/CEO Stirling, publishing, emergingtech...",none,False,True,...,000000,False,http://pbs.twimg.com/profile_images/1058800581...,https://pbs.twimg.com/profile_images/105880058...,https://pbs.twimg.com/profile_banners/15081182...,False,False,,,
4,856240505826496513,856240505826496513,Suriya Subramanian,SuriyaSubraman,"London, UK",https://www.linkedin.com/in/suriyansubramanian/,Data driven Change consultant \nhttp://finperf...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/8746975191...,https://pbs.twimg.com/profile_images/874697519...,,True,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10779,1250121032616337408,1250121032616337408,Gabbas,GabbasOfficial,United States,,,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1333972162...,https://pbs.twimg.com/profile_images/133397216...,https://pbs.twimg.com/profile_banners/12501210...,True,False,,,
10780,56869329,56869329,OneLMedia™,OneLMediaNow,United States,http://Onelmedia.com,Founder |Creating Healthy Solutions For Entrep...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1282231532...,https://pbs.twimg.com/profile_images/128223153...,https://pbs.twimg.com/profile_banners/56869329...,False,False,,,
10781,3665782757,3665782757,Blogged_Environment,blogged_env,,http://alexandrinepress.co.uk/blogged-environment,Alternative perspectives and blogs from the Bu...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/6724640883...,https://pbs.twimg.com/profile_images/672464088...,https://pbs.twimg.com/profile_banners/36657827...,False,False,,,
10783,1140322534585491457,1140322534585491457,Fusion Winter 🛰️,fusionwinter,Globally Monitored Data Fusion,,A systems scientist's pertinent daily scan of ...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1326959754...,https://pbs.twimg.com/profile_images/132695975...,https://pbs.twimg.com/profile_banners/11403225...,False,False,,,


#### update user_df[id_str] field with new_df[id_str] so we can have a common column between new_df & user_df

In [285]:
user_df['id_str'] = new_df['id_str']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_df['id_str'] = new_df['id_str']


In [286]:
user_df.columns

Index(['id', 'id_str', 'name', 'screen_name', 'location', 'url', 'description',
       'translator_type', 'protected', 'verified', 'followers_count',
       'friends_count', 'listed_count', 'favourites_count', 'statuses_count',
       'created_at', 'utc_offset', 'time_zone', 'geo_enabled', 'lang',
       'contributors_enabled', 'is_translator', 'profile_background_color',
       'profile_background_image_url', 'profile_background_image_url_https',
       'profile_background_tile', 'profile_link_color',
       'profile_sidebar_border_color', 'profile_sidebar_fill_color',
       'profile_text_color', 'profile_use_background_image',
       'profile_image_url', 'profile_image_url_https', 'profile_banner_url',
       'default_profile', 'default_profile_image', 'following',
       'follow_request_sent', 'notifications'],
      dtype='object')

In [287]:
user_df

Unnamed: 0,id,id_str,name,screen_name,location,url,description,translator_type,protected,verified,...,profile_text_color,profile_use_background_image,profile_image_url,profile_image_url_https,profile_banner_url,default_profile,default_profile_image,following,follow_request_sent,notifications
0,22631958,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",http://www.collectiveintelligence.com,Founder Collective Intelligence #TheFutureOfW...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1071464844...,https://pbs.twimg.com/profile_images/107146484...,https://pbs.twimg.com/profile_banners/22631958...,False,False,,,
1,1079825507737202688,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",http://theaaih.org,Global organization to educate and advocate fo...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1128344835...,https://pbs.twimg.com/profile_images/112834483...,https://pbs.twimg.com/profile_banners/10798255...,False,False,,,
2,716658880508510208,1337464032564027392,Edge Technology News,NewsEdgetech,Online,,Technology Social Channel. #EdgeComputing #IoT...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1157563564...,https://pbs.twimg.com/profile_images/115756356...,https://pbs.twimg.com/profile_banners/71665888...,True,False,,,
3,15081182,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV",http://WarrenWhitlock.com,"Founder/CEO Stirling, publishing, emergingtech...",none,False,True,...,000000,False,http://pbs.twimg.com/profile_images/1058800581...,https://pbs.twimg.com/profile_images/105880058...,https://pbs.twimg.com/profile_banners/15081182...,False,False,,,
4,856240505826496513,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",https://www.linkedin.com/in/suriyansubramanian/,Data driven Change consultant \nhttp://finperf...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/8746975191...,https://pbs.twimg.com/profile_images/874697519...,,True,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10779,1250121032616337408,1338932465193390083,Gabbas,GabbasOfficial,United States,,,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1333972162...,https://pbs.twimg.com/profile_images/133397216...,https://pbs.twimg.com/profile_banners/12501210...,True,False,,,
10780,56869329,1338932468779536385,OneLMedia™,OneLMediaNow,United States,http://Onelmedia.com,Founder |Creating Healthy Solutions For Entrep...,none,False,False,...,333333,True,http://pbs.twimg.com/profile_images/1282231532...,https://pbs.twimg.com/profile_images/128223153...,https://pbs.twimg.com/profile_banners/56869329...,False,False,,,
10781,3665782757,1338932477088423941,Blogged_Environment,blogged_env,,http://alexandrinepress.co.uk/blogged-environment,Alternative perspectives and blogs from the Bu...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/6724640883...,https://pbs.twimg.com/profile_images/672464088...,https://pbs.twimg.com/profile_banners/36657827...,False,False,,,
10783,1140322534585491457,1338932488945737728,Fusion Winter 🛰️,fusionwinter,Globally Monitored Data Fusion,,A systems scientist's pertinent daily scan of ...,none,False,False,...,000000,False,http://pbs.twimg.com/profile_images/1326959754...,https://pbs.twimg.com/profile_images/132695975...,https://pbs.twimg.com/profile_banners/11403225...,False,False,,,


### Remove unwanted columms from user_df

In [288]:
user_unwanted_col = ['id', 'url','translator_type','protected','verified','utc_offset','time_zone','geo_enabled',\
                    'lang','contributors_enabled','is_translator','profile_background_color','profile_background_image_url',\
                     'profile_background_image_url_https','profile_background_tile', 'profile_link_color',\
                     'profile_sidebar_border_color', 'profile_sidebar_fill_color','profile_text_color', 'profile_use_background_image',\
                     'profile_banner_url','default_profile', 'default_profile_image', 'following','follow_request_sent', 'notifications','profile_image_url','profile_image_url_https']

In [289]:
clean_user_df = user_df.drop(user_unwanted_col,axis=1)
clean_user_df

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017
...,...,...,...,...,...,...,...,...,...,...,...
10779,1338932465193390083,Gabbas,GabbasOfficial,United States,,122,516,7,5154,1585,Tue Apr 14 17:57:54 +0000 2020
10780,1338932468779536385,OneLMedia™,OneLMediaNow,United States,Founder |Creating Healthy Solutions For Entrep...,4026,2043,889,14193,78764,Wed Jul 15 00:31:18 +0000 2009
10781,1338932477088423941,Blogged_Environment,blogged_env,,Alternative perspectives and blogs from the Bu...,642,496,48,324,657,Tue Sep 15 16:46:26 +0000 2015
10783,1338932488945737728,Fusion Winter 🛰️,fusionwinter,Globally Monitored Data Fusion,A systems scientist's pertinent daily scan of ...,363,474,2,1271,19240,Sun Jun 16 18:17:33 +0000 2019


In [291]:
duplicates = clean_user_df[clean_user_df.duplicated('id_str')]
duplicates

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at


### Drop any None values from location column before we call google API for lat and lng values

In [292]:
clean_user_df=clean_user_df.dropna(subset=['location'])
clean_user_df

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017
...,...,...,...,...,...,...,...,...,...,...,...
10771,1338932428140916737,GFOSS,gfoss_en,"Athens, Greece",The English account of GFOSS – Open Technologi...,554,539,75,6244,6661,Wed Oct 28 21:11:32 +0000 2015
10779,1338932465193390083,Gabbas,GabbasOfficial,United States,,122,516,7,5154,1585,Tue Apr 14 17:57:54 +0000 2020
10780,1338932468779536385,OneLMedia™,OneLMediaNow,United States,Founder |Creating Healthy Solutions For Entrep...,4026,2043,889,14193,78764,Wed Jul 15 00:31:18 +0000 2009
10783,1338932488945737728,Fusion Winter 🛰️,fusionwinter,Globally Monitored Data Fusion,A systems scientist's pertinent daily scan of ...,363,474,2,1271,19240,Sun Jun 16 18:17:33 +0000 2019


In [293]:
clean_user_df.loc[clean_user_df['name'] == 'GusCherubina' ]

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at
702,1337467623634767872,GusCherubina,guscherubina,Brasil,DITADURA NUNCA MAIS,498,1239,8,70699,78164,Mon Jul 13 12:03:53 +0000 2009


In [294]:
# Merge new_df clean_user_df
combined_df = clean_user_df.merge(new_df,on='id_str')
combined_df.columns

Index(['id_str', 'name', 'screen_name', 'location', 'description',
       'followers_count', 'friends_count', 'listed_count', 'favourites_count',
       'statuses_count', 'created_at', 'text', 'user', 'lang'],
      dtype='object')

In [295]:
combined_df=combined_df.drop(columns=['user'])

In [296]:
combined_df.loc[combined_df['name'] == 'GusCherubina' ]

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at,text,lang
264,1337467623634767872,GusCherubina,guscherubina,Brasil,DITADURA NUNCA MAIS,498,1239,8,70699,78164,Mon Jul 13 12:03:53 +0000 2009,RT @bigdata: 1/ In this episode of #TheDataExc...,en


In [297]:
combined_df

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at,text,lang
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009,Interesting... Machine Learning and AI - What ...,en
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018,Take a look at these open positions in applied...,en
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016,RT @Xbond49: Honored &amp; humbled to be in th...,en
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008,@sciencebase add #IoT data/log with current lo...,en
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017,iShares Robotics And Artificial Intelligence M...,en
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3205,1338932428140916737,GFOSS,gfoss_en,"Athens, Greece",The English account of GFOSS – Open Technologi...,554,539,75,6244,6661,Wed Oct 28 21:11:32 +0000 2015,RT @pitres: .@OECD 60th Anniversary continues ...,en
3206,1338932465193390083,Gabbas,GabbasOfficial,United States,,122,516,7,5154,1585,Tue Apr 14 17:57:54 +0000 2020,RT @NagatoDharma: 📜 How $Ocean Protocol will U...,en
3207,1338932468779536385,OneLMedia™,OneLMediaNow,United States,Founder |Creating Healthy Solutions For Entrep...,4026,2043,889,14193,78764,Wed Jul 15 00:31:18 +0000 2009,How #AI is Changing the Way We Optimize https:...,en
3208,1338932488945737728,Fusion Winter 🛰️,fusionwinter,Globally Monitored Data Fusion,A systems scientist's pertinent daily scan of ...,363,474,2,1271,19240,Sun Jun 16 18:17:33 +0000 2019,RT @AI_TechNews: EU human rights agency issues...,en


### Run below code to remove any space between city and state value to build proper google URL 

In [298]:
user_locations = pd.DataFrame(combined_df['location'].str.replace(' ',''))
user_locations['id_str']=pd.DataFrame(combined_df['id_str'])
user_locations

Unnamed: 0,location,id_str
0,"Harrisburg,PA",1337464014419480576
1,"Baltimore,MD",1337464027719499777
2,Online,1337464032564027392
3,"LasVegas,NV",1337464037680955392
4,"London,UK",1337464049794076672
...,...,...
3205,"Athens,Greece",1338932428140916737
3206,UnitedStates,1338932465193390083
3207,UnitedStates,1338932468779536385
3208,GloballyMonitoredDataFusion,1338932488945737728


#### check if there is any duplicate rows with same id's

In [300]:
user_locations[user_locations.duplicated()]

Unnamed: 0,location,id_str


In [309]:
from config import gkey
import requests
lat=[]
lng=[]
id_str=[]
for index,location in user_locations.iterrows():
    
    #Build and end point URL
    target_url = ('https://maps.googleapis.com/maps/api/geocode/json?'
    'address={0}&key={1}').format(location['location'], gkey)
    
    print(target_url)
    id_str.append(location['id_str'])
    # Run a request to endpoint and convert result to json
    geo_data = requests.get(target_url).json()
    
    try:
        # Extract latitude and longitude
        lat.append(geo_data["results"][0]["geometry"]["location"]["lat"])
        lng.append(geo_data["results"][0]["geometry"]["location"]["lng"])
    except:
        print(location)
        pass

https://maps.googleapis.com/maps/api/geocode/json?address=Harrisburg,PA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore,MD&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Online&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LasVegas,NV&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=London,UK&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Wesupportworldwide&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=UNITEDSTATESOFEUROPE&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=JustwereIhavetobe.&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location     JustwereIhavetobe.
id_str      1337464060787429376
Name:

https://maps.googleapis.com/maps/api/geocode/json?address=HlavníměstoPraha&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=RedwoodCity,CA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Seattle,WA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=DelftNL&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Florida,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Cambridge,England&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Austin,TX&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=NewDelhi&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Paris,France&key=AIzaSyAuQ

location            NewYork#NYC
id_str      1337465620603019266
Name: 137, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Athens&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=FranceFinistèreQuimper&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Montreal&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=GenovaItaly&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Wirral&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Cary,NC&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=ꎮ̨̤̫͈̊͂̑ꎯ͍̠̳̗̠̫͊ͮ̋ͯ͜͞ͅ&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    ꎮ̨̤̫͈̊͂̑ꎯ͍̠̳̗̠̫͊ͮ̋ͯ͜͞ͅ
id_str         1337465713875935232
Name: 144, dtype: object
https://maps.

https://maps.googleapis.com/maps/api/geocode/json?address=Paris,France&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LON|NYC|HKG|SYD|SIN&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=JordanMN&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Richland,WA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=México&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=España&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=WORLDCITIZEN💙DM-FREEZONE&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Redmond,Washington&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Alameda,CA&key

https://maps.googleapis.com/maps/api/geocode/json?address=GJ357d&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location                 GJ357d
id_str      1337467798948368392
Name: 275, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Global&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=SriLanka&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Pune&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=MiddleoftheUS&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Louisville,KY&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=SãoPaulo&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=天の川&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.co

https://maps.googleapis.com/maps/api/geocode/json?address=N/A&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=London,England&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Dallas,TX&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=🇻🇦🇲🇽🇺🇸🇪🇪🇪🇺&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location             🇻🇦🇲🇽🇺🇸🇪🇪🇪🇺
id_str      1338641182033911808
Name: 346, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=ROMANIA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Worldwide&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Ukraine&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LaPaz,BCS&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.google

https://maps.googleapis.com/maps/api/geocode/json?address=Decentralized.Green&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    Decentralized.Green
id_str      1338642468187885568
Name: 412, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Rotterdam,TheNetherlands&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=California,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Florida,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=SanFrancisco&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Cleveland,Ohio&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Sydney,NewSouthWales&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Paris&key=AIzaSyAu

https://maps.googleapis.com/maps/api/geocode/json?address=Mumbai&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=#everythingrelatestoeverything&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    #everythingrelatestoeverything
id_str                 1338644562013810689
Name: 482, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Paris,France&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=SanJose,CA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Missouri,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Saint-Hyacinthe,Québec&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Tennessee,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Riode

https://maps.googleapis.com/maps/api/geocode/json?address=ValladolidEspaña&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=/etc/tmpfiles.d/trab.4u&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    /etc/tmpfiles.d/trab.4u
id_str          1338646940943314949
Name: 552, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=日本東京&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Lakeway,TX&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LasVegas,NV&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Napoli,Campania&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Chicago,IL,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Rajkot,India&key=AIzaSyAuQLcV7V

https://maps.googleapis.com/maps/api/geocode/json?address=London&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Campinas,SãoPaulo&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Sarnia,Ontario&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=127.0.0.1&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location              127.0.0.1
id_str      1338649069707792384
Name: 624, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Ireland&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=オフィシャルサイト→&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location             オフィシャルサイト→
id_str      1338649160103260160
Name: 626, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=UCBerkeley&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/

https://maps.googleapis.com/maps/api/geocode/json?address=LosAngeles&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Ottawa,Canada&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Bombay-India&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=RealHighQualityTraffic👉&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    RealHighQualityTraffic👉
id_str          1338650783852732417
Name: 690, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=東京都港区芝3-15-14&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=London,UK&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Stoke-on-Trent&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Australia&key=AIzaSyAuQLcV7VJE

https://maps.googleapis.com/maps/api/geocode/json?address=Florida,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=埼玉県さいたま市&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Chattanooga,TN&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Switzerland&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Massachusetts&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Houston&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Floripa-SC&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Rancagua&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Stockholm&key=AIzaSyAuQLcV7VJEMolYoE

https://maps.googleapis.com/maps/api/geocode/json?address=Bruselas,Bélgica&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=SEAPORT1A&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Coimbatore&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Grenoble,France&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Fremont,CA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LongIsland,NY&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=PaloAlto,CA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Prague,CzechRepublic&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Firenze,Toscan

https://maps.googleapis.com/maps/api/geocode/json?address=Paris🇫🇷🌎.TweetsFR/EN/ES&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=UnitedStates&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=NYC&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=LocationsinUSA&Philippines&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Gaia&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=ParisUE🌎TweetsFR/EN/ES&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Guelph&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Paris,Ile-de-France

https://maps.googleapis.com/maps/api/geocode/json?address=USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Earth&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Germany&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=California,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=NewYork,NY&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Carrollton,TX&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Edinburgh,Scotland&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=IslamicRepublicofIran&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
https://maps.googleapis.com/maps/api/geocode/json?address=Worldwide&key=AIzaSyAuQLcV7

location            NewYork,USA
id_str      1338873301217800194
Name: 1016, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=California,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location         California,USA
id_str      1338873304635957251
Name: 1017, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Europe,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location             Europe,USA
id_str      1338873328908570627
Name: 1018, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=UnitedStates&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location           UnitedStates
id_str      1338873348940509185
Name: 1019, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Dubai,UnitedArabEmirates&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    Dubai,UnitedArabEmirates
id_str           1338873355873738753
Name: 1020, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Greece&key

https://maps.googleapis.com/maps/api/geocode/json?address=Akure,Nigeria&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location          Akure,Nigeria
id_str      1338873883965988871
Name: 1059, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=FolsomCa.&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location              FolsomCa.
id_str      1338873921496440833
Name: 1060, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Australia&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location              Australia
id_str      1338873944502226944
Name: 1061, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Atlanta,GA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location             Atlanta,GA
id_str      1338873982385344515
Name: 1062, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=98001&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location                  98001
id_str      1338873997589499905
Name: 1063, dty

location                 Mexico
id_str      1338874466500108288
Name: 1099, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Pittsburgh|WinstonSalem&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    Pittsburgh|WinstonSalem
id_str          1338874469323071490
Name: 1100, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Birmingham,England&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location     Birmingham,England
id_str      1338874471860621314
Name: 1101, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Bangalore,India&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location        Bangalore,India
id_str      1338874475731828737
Name: 1102, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=ABCanada&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location               ABCanada
id_str      1338874529838366720
Name: 1103, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Gent,Bel

location    withhostKevinCraine
id_str      1338875017568923658
Name: 1140, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=AhmadabadCity,India&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    AhmadabadCity,India
id_str      1338875025185677313
Name: 1141, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=California,USA&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location         California,USA
id_str      1338875090998456320
Name: 1142, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=Boulder,CO&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location             Boulder,CO
id_str      1338875103921270785
Name: 1143, dtype: object
https://maps.googleapis.com/maps/api/geocode/json?address=www.facebook.com/YarntonGreen&key=AIzaSyAuQLcV7VJEMolYoEVym53T8m6B86UOKaI
location    www.facebook.com/YarntonGreen
id_str                1338875122208415745
Name: 1144, dtype: object
https://maps.googleapis.com/maps/api/geocode/j

KeyboardInterrupt: 

#### create a dataframe with lat & lng

In [317]:
lat_lng_df=pd.DataFrame()
lat_lng_df['id_str']=[]
lat_lng_df['lat']=[]
lat_lng_df['lng']=[]
lat_lng_df['id_str']=id_str
lat_lng_df['lat']=pd.DataFrame(lat)
lat_lng_df['lng']=pd.DataFrame(lng)

In [344]:
lat_lng_df

Unnamed: 0,id_str,lat,lng
0,1337464014419480576,40.273191,-76.886701
1,1337464027719499777,39.290385,-76.612189
2,1337464032564027392,36.416750,-94.222944
3,1337464037680955392,36.169941,-115.139830
4,1337464049794076672,51.507351,-0.127758
...,...,...,...
1143,1338875103921270785,,
1144,1338875122208415745,,
1145,1338875127904280576,,
1146,1338875150993862658,,


### Merger lat lng df with combined df so we have one final dataframe with original df,user df and latlng df

In [319]:
final_df=combined_df.merge(lat_lng_df, on='id_str')
final_df

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at,text,lang,lat,lng
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009,Interesting... Machine Learning and AI - What ...,en,40.273191,-76.886701
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018,Take a look at these open positions in applied...,en,39.290385,-76.612189
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016,RT @Xbond49: Honored &amp; humbled to be in th...,en,36.416750,-94.222944
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008,@sciencebase add #IoT data/log with current lo...,en,36.169941,-115.139830
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017,iShares Robotics And Artificial Intelligence M...,en,51.507351,-0.127758
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,1338875103921270785,Tim Rohde,timrohde,"Boulder, CO","Boulder (home), social media, Burning Man, sta...",799,1867,85,77,9078,Tue Aug 25 22:09:41 +0000 2009,"More #ai: Wireless, ultra-thin and battery-fre...",en,,
1144,1338875122208415745,Yarnton Green,YarntonGreen,www.facebook.com/YarntonGreen,"Yarnton Green Residents' Association (YGRA), C...",300,384,17,11,7287,Sun Mar 13 10:13:41 +0000 2011,The latest The Oxford &amp; Oxfordshire Observ...,en,,
1145,1338875127904280576,Jan Barbosa 🐝,JBarbosaPR,"Puerto Rico, USA",Brand Ambassador at @beBee Inc. | @Onalytica 2...,14688,9016,2058,75815,60801,Mon Apr 20 23:40:44 +0000 2015,RT @ipfconline1: Top 5 Sources For #Analytics ...,en,,
1146,1338875150993862658,Jon Shy,ubelievesG,Morocco,Legislation Education Coaching Skills Industry...,223,1215,1,72,9174,Sat Sep 05 12:22:33 +0000 2020,RT @_smartcity_: The 5 Major Sectors in #Smart...,en,,


In [321]:
final_df=final_df.dropna(subset=['lat', 'lng'])

In [322]:
final_df

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at,text,lang,lat,lng
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009,Interesting... Machine Learning and AI - What ...,en,40.273191,-76.886701
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018,Take a look at these open positions in applied...,en,39.290385,-76.612189
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016,RT @Xbond49: Honored &amp; humbled to be in th...,en,36.416750,-94.222944
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008,@sciencebase add #IoT data/log with current lo...,en,36.169941,-115.139830
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017,iShares Robotics And Artificial Intelligence M...,en,51.507351,-0.127758
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,1338842645184077826,RayWitselHA,RayWitselHA,"Schiedam, Nederland",Het slimme huis van @Ray_Witsel en @Ana89deW,38,8,1,0,3154,Mon Jun 10 21:17:25 +0000 2019,0 @ring doorbell presses occurred in the last ...,en,27.664827,-81.515754
945,1338842667606687746,Richie Zhang,RichieZFX,Singapore,Full time #Trader #Daytrader and #AI advocate....,755,217,14,3179,3734,Mon Feb 05 14:00:59 +0000 2018,$XAUUSD $GOLD - if price moves exactly as what...,en,39.074208,21.824312
946,1338842683473719297,"Phil Wilmarth (one of ""those"" doctors...)",pwilmarth,Portland OR,"Proteomics data analysis: shotgun quant, noteb...",921,509,10,8550,3944,Thu Jun 23 16:53:38 +0000 2011,RT @wmlandau: #rstats {distill} is so elegant ...,en,40.712775,-74.005973
947,1338842700452421636,Matt Roach,DrMattRoach,Swansea,Machine Learning - Data Science - Human-DataFo...,1084,926,372,416,8654,Thu Dec 17 14:39:48 +0000 2009,The latest #MachineLearning #BigData Research!...,en,55.378051,-3.435973


In [323]:
final_df=final_df.rename(columns={'created_at_y': "account_creation_date", 'lang': 'language'})

In [324]:
final_df.head()

Unnamed: 0,id_str,name,screen_name,location,description,followers_count,friends_count,listed_count,favourites_count,statuses_count,created_at,text,language,lat,lng
0,1337464014419480576,Chuck Russell,cichuck,"Harrisburg, PA",Founder Collective Intelligence #TheFutureOfW...,9351,7702,601,4516,67874,Tue Mar 03 15:05:32 +0000 2009,Interesting... Machine Learning and AI - What ...,en,40.273191,-76.886701
1,1337464027719499777,Alliance for Artificial Intelligence in Health...,theaaih,"Baltimore, MD",Global organization to educate and advocate fo...,1041,455,41,858,745,Mon Dec 31 19:43:58 +0000 2018,Take a look at these open positions in applied...,en,39.290385,-76.612189
2,1337464032564027392,Edge Technology News,NewsEdgetech,Online,Technology Social Channel. #EdgeComputing #IoT...,1557,1492,7,7252,7438,Sun Apr 03 16:09:26 +0000 2016,RT @Xbond49: Honored &amp; humbled to be in th...,en,36.41675,-94.222944
3,1337464037680955392,Warren Whitlock,WarrenWhitlock,"Las Vegas, NV","Founder/CEO Stirling, publishing, emergingtech...",499227,373306,11631,16754,239834,Wed Jun 11 03:51:13 +0000 2008,@sciencebase add #IoT data/log with current lo...,en,36.169941,-115.13983
4,1337464049794076672,Suriya Subramanian,SuriyaSubraman,"London, UK",Data driven Change consultant \nhttp://finperf...,4633,3307,102,4989,282295,Sun Apr 23 20:16:59 +0000 2017,iShares Robotics And Artificial Intelligence M...,en,51.507351,-0.127758


# Load data back into database

In [326]:
db.tweet_analysis.drop()

In [327]:
#connect to mongo
MONGO_HOST = 'mongodb://localhost/twitteranalysis'
client = MongoClient(MONGO_HOST)
# use twitteranalysis database. if it doesn't exist, it will be created
db= client['twitterdb']
#collection name
twitter_collection = db.tweet_analysis

In [328]:
#insert data into targeted database
final_df.reset_index(inplace=True)
data_dict = final_df.to_dict('records')
db.tweet_analysis.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x7f9ce64dc600>

## Additional Analysis

In [329]:
import gmaps
from matplotlib.cm import viridis
from matplotlib.colors import to_hex

In [331]:
df = pd.DataFrame({'name':final_df['name'],'text':final_df['text']})
#df.fillna(0)
user_location = final_df.to_dict('records')

info_box_template="""<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Tweet</dt><dd>{text}</dd>
</dl>
"""
latlng = final_df[['lat', 'lng']]
location_info =[info_box_template.format(**location) for location in user_location]
marker_layer = gmaps.marker_layer(latlng, info_box_content=location_info)
fig = gmaps.figure()
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(height='420px'))