<a href="https://colab.research.google.com/github/atinsinghal97/explore_fb_data/blob/main/data_preprocessing/getting_coordinates_from_ip.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This file is used to make API calls to external services to get the coordinates from IP addresses in the user's data to help them visualize the same on a map.

In [None]:
# Installing Python Packages
# NOTE: ONLY NEED TO DO IT ONCE
!pip install ipinfo
!pip install ipdata

In [None]:
import os
import zipfile
import json
from pprint import pprint # pretty print- better visualization of json data

In [None]:
# Getting Dataset

!wget --no-check-certificate \
    "https://drive.google.com/u/0/uc?id=106gkJ-Eb-sD6O1rXxbR4d2fsw0uezd3b&export=download" \
    -O "/tmp/facebook-sampledataset.zip"

local_zip = '/tmp/facebook-sampledataset.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

--2020-12-31 07:30:32--  https://drive.google.com/u/0/uc?id=106gkJ-Eb-sD6O1rXxbR4d2fsw0uezd3b&export=download
Resolving drive.google.com (drive.google.com)... 173.194.69.101, 173.194.69.102, 173.194.69.138, ...
Connecting to drive.google.com (drive.google.com)|173.194.69.101|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0s-a8-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/hlffv7g18bbttipg231usqguv8rccqbc/1609399800000/08981783661760789041/*/106gkJ-Eb-sD6O1rXxbR4d2fsw0uezd3b?e=download [following]
--2020-12-31 07:30:35--  https://doc-0s-a8-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/hlffv7g18bbttipg231usqguv8rccqbc/1609399800000/08981783661760789041/*/106gkJ-Eb-sD6O1rXxbR4d2fsw0uezd3b?e=download
Resolving doc-0s-a8-docs.googleusercontent.com (doc-0s-a8-docs.googleusercontent.com)... 108.177.119.132, 2a00:1450:4013:c00::84
Connecting to doc-0s-a8-docs.googleusercontent.com (d

In [None]:
print(len(os.listdir('/tmp/facebook-sampledataset')))

# Expected Output:
# 19

19


In [None]:
# Function to get coordinates from IP [Limited Usage- 50,000/month]
# RUN EITHER THIS OR THE CELL BELOW

import ipinfo

access_token = '3dcbac5b8a715e'
handler = ipinfo.getHandler(access_token)

def get_coordinates_from_ip (ip_address):
  # ip_address = '216.239.36.21'
  details = handler.getDetails(ip_address)
  return details.loc    # 37.4056,-122.0775

get_coordinates_from_ip('216.239.36.21')
# get_coordinates_from_ip('2a02:c7d:4204:ee00:ecd6:19ba:f1c7:1190')

'38.0088,-122.1175'

In [None]:
# Function to get coordinates from IP- Alternative [Limited Usage- 1,500/day]
# RUN EITHER THIS OR THE CELL ABOVE

from ipdata import ipdata

access_token = 'f2f889b131f0d4bf412a7e79e6f8c41ff61de8ebb71ec58bbc8850aa'
ipdata = ipdata.IPData(access_token)

def get_coordinates_from_ip (ip_address):
  # ip_address = '216.239.36.21'
  response = ipdata.lookup(ip_address)
  loc = str(response['latitude']) + ',' + str(response['longitude'])
  return loc    # 37.3891,-122.0866

get_coordinates_from_ip('216.239.36.21')
# get_coordinates_from_ip('2a02:c7d:4204:ee00:ecd6:19ba:f1c7:1190')

'37.3891,-122.0866'

In [None]:
# Function to update global ip-coordinate map

# ip_set = set()
ip_coordinate_map = dict()

def update_ip_coordinate_map_with(json_list, json_key): 
  # json_list: list of JSON object
  # json_key: identifier for ip in the json file
  for entry in json_list:
    ip = entry[json_key]
    if ip not in ip_coordinate_map:
      ip_coordinate_map[ip] = get_coordinates_from_ip(ip)
      

In [None]:
# Function to read JSON data from files and update global ip-coordinate map

def load_data_from (file_path, json_list, json_key):
  # file_path: path to the file
  # json_list: list of JSON object
  # json_key: identifier for ip in the json file
  file = open(file_path)
  file_data = json.load(file)
  file.close()

  update_ip_coordinate_map_with(file_data[json_list], json_key)

# updating the IP coordinates as a global parameter to limit external API calls and reduce overheads
# we can also save the data on local servers as a file if space isn't a constraint.

In [None]:
# List of all the files which have IP information

ip_file_list = [["/tmp/facebook-sampledataset/security_and_login_information/account_activity.json", "account_activity", "ip_address"],
                ["/tmp/facebook-sampledataset/security_and_login_information/used_ip_addresses.json", "used_ip_address", "ip"],
                ["/tmp/facebook-sampledataset/security_and_login_information/where_you're_logged_in.json", "active_sessions", "ip_address"],
                ["/tmp/facebook-sampledataset/security_and_login_information/logins_and_logouts.json", "account_accesses", "ip_address"],
                ["/tmp/facebook-sampledataset/security_and_login_information/authorized_logins.json", "recognized_devices", "ip_address"]
]

# Function call to create a master ip-coordinate map

for obj in ip_file_list:
  load_data_from(file_path = obj[0], json_list = obj[1], json_key = obj[2])
  print(len(ip_coordinate_map))

188
199
204
205
207


In [None]:
ip_coordinate_map

{'128.40.176.132': '51.5085,-0.1257',
 '128.41.38.80': '51.5085,-0.1257',
 '128.41.9.30': '51.5085,-0.1257',
 '138.38.238.132': '51.3751,-2.3617',
 '144.82.8.85': '51.5085,-0.1257',
 '148.252.128.52': '51.5085,-0.1257',
 '148.252.129.114': '51.5085,-0.1257',
 '151.230.215.80': '54.9911,-1.5340',
 '151.230.75.219': '53.7628,-2.7045',
 '167.98.125.201': '51.5085,-0.1257',
 '170.194.32.12': '51.5085,-0.1257',
 '170.194.32.42': '51.5085,-0.1257',
 '170.194.32.58': '51.5085,-0.1257',
 '212.49.215.19': '51.5580,-1.7812',
 '213.205.240.132': '51.5085,-0.1257',
 '2a02:0c7d:4204:ee00:ecd6:19ba:f1c7:1190': '54.5968,-5.9254',
 '2a02:c7d:4204:ee00:ecd6:19ba:f1c7:1190': '54.5968,-5.9254',
 '5.148.125.98': '51.5085,-0.1257',
 '77.218.246.37': '59.3294,18.0687',
 '77.81.11.135': '44.6500,24.2667',
 '79.173.170.234': '51.5085,-0.1257',
 '81.153.195.59': '53.4809,-2.2374',
 '81.153.199.196': '51.8657,-2.2431',
 '81.154.57.249': '52.1889,0.9977',
 '81.156.166.92': '54.9981,-7.3093',
 '81.157.206.177': '

In [None]:
# Standard function to write JSON data to file

def write_json (data, file_name):
  with open(file_name,'w') as f: 
        json.dump(data, f, indent=4)

In [None]:
# Function to add coordinates data to the existing file

def update_files(file_path, json_header, json_key):
  with open(file_path) as json_file: 
      data = json.load(json_file) 
        
      json_list = data[json_header]

      for entry in json_list:
        ip = entry[json_key]
        coordinate = ip_coordinate_map[ip]
        lat, lon = coordinate.split(",")
        update_with = { 'coordinate': {'latitude': lat, 'longitude': lon} }
        entry.update(update_with)
        # pprint(entry)
      # pprint(data)

      write_json(data, file_path)
      
# update_files(ip_file_list[2][0], ip_file_list[2][1], ip_file_list[2][2])

In [None]:
# repeatedly calling update_files function for every file in the list

for obj in ip_file_list:
  update_files(file_path = obj[0], json_header = obj[1], json_key = obj[2])