In [6]:
import IP2Location
from pymongo import MongoClient
from tqdm import tqdm

# Initialize the IP2Location object
ip2location = IP2Location.IP2Location()

# Load the BIN database (replace 'path/to/IP2LOCATION-LITE-DB.BIN' with your actual BIN file path)
ip2location.open("/Users/dai/Documents/Data_Engineer/DE_K9_Final/IP2Location-Python-master/data/IP2LOCATION-LITE-DB11.BIN")

# Connect to MongoDB (replace 'mongodb://localhost:27017/' with your MongoDB connection string)
client = MongoClient('mongodb://localhost:27017/')
db = client['glamira']
collection = db['summary']

# Function to enrich IP data
def enrich_ip_data(ip):
    record = ip2location.get_all(ip)
    return {
        "country_code": record.country_short,
        "country_name": record.country_long,
        "region_name": record.region,
        "city_name": record.city,
        "latitude": record.latitude,
        "longitude": record.longitude,
        "zipcode": record.zipcode,
        "timezone": record.timezone
    }

# Get the total number of documents
total_documents = collection.count_documents({})

# Create a progress bar and process documents
batch_size = 100  # Adjust batch size as needed
batch_updates = []

with tqdm(total=total_documents, desc="Enriching IP Data", unit='doc') as pbar:
    for document in collection.find():
        ip = document['ip']  # Assuming the IP address is stored as the "ip" field
        enriched_data = enrich_ip_data(ip)
        batch_updates.append({'_id': document['_id'], **enriched_data})
        
        if len(batch_updates) >= batch_size:
            # Perform batch update
            for update in batch_updates:
                collection.update_one({'_id': update['_id']}, {'$set': update}, upsert=True)
            batch_updates = []
        
        pbar.update(1)  # Update progress bar for each processed document

    # Update remaining documents in batch
    if batch_updates:
        for update in batch_updates:
            collection.update_one({'_id': update['_id']}, {'$set': update}, upsert=True)

print("IP data enrichment complete.")


Enriching IP Data: 100%|██████████| 41432473/41432473 [1:41:23<00:00, 6810.71doc/s]  

IP data enrichment complete.



