In [None]:
import os
import pandas as pd
import requests
import time

# Path to the directory containing the CSV files
base_directory = r'C:\Users\Video Streaming'

# API URL and your access token (replace '??????????' with your actual token)
api_url = "https://ipinfo.io/"
access_token = "??????????"  # You can get this token from ipinfo.io after you create an account

# Function to get geolocation info from IP
def get_geolocation_info(ip_address):
    try:
        # Send a GET request to the ipinfo API
        response = requests.get(f"{api_url}{ip_address}/json?token={access_token}")
        data = response.json()

        # Extract required fields
        country = data.get('country', '')
        region = data.get('region', '')
        city = data.get('city', '')
        org = data.get('org', '')

        # Extract the location (longitude and latitude)
        loc = data.get('loc', '').split(',')
        if len(loc) == 2:
            latitude, longitude = loc
        else:
            latitude, longitude = '', ''

        return country, region, city, isp, latitude, longitude

    except Exception as e:
        print(f"Error fetching data for IP {ip_address}: {e}")
        return '', '', '', '', '', ''

# Function to process a single file
def process_file(file_path):
    # Load the dataset with the specified encoding
    try:
        df = pd.read_csv(file_path, encoding='utf-8')  # Default encoding
    except UnicodeDecodeError:
        df = pd.read_csv(file_path, encoding='ISO-8859-1')  # Fallback encoding

    # Add new columns for Country, Region, City, ISP, Longitude, Latitude if they don't exist
    for column in ['Country', 'Region', 'City', 'Org', 'Longitude', 'Latitude']:
        if column not in df.columns:
            df[column] = ''

    # Get unique IP addresses from the 'Source' column
    unique_ips = df['Source'].unique()

    # Fetch geolocation data for each unique IP and update the dataframe
    for ip in unique_ips:
        country, region, city, isp, latitude, longitude = get_geolocation_info(ip)
        df.loc[df['Source'] == ip, 'Country'] = country
        df.loc[df['Source'] == ip, 'Region'] = region
        df.loc[df['Source'] == ip, 'City'] = city
        df.loc[df['Source'] == ip, 'Org'] = isp
        df.loc[df['Source'] == ip, 'Longitude'] = longitude
        df.loc[df['Source'] == ip, 'Latitude'] = latitude

        print(f"Processed IP: {ip} -> Country: {country}, Region: {region}, City: {city}, ISP: {isp}, Lat: {latitude}, Lon: {longitude}")
        
        # Sleep to avoid hitting API rate limits
        time.sleep(1)

    # Generate new file name with 'Added'
    new_file_name = os.path.splitext(file_path)[0] + 'Added' + os.path.splitext(file_path)[1]
    
    # Save the updated dataframe to a new CSV file
    df.to_csv(new_file_name, index=False)
    print(f"File saved to {new_file_name}")

# Traverse through all folders and subfolders
for foldername, subfolders, filenames in os.walk(base_directory):
    for filename in filenames:
        if filename.endswith('.csv'):
            file_path = os.path.join(foldername, filename)
            print(f"Processing file: {file_path}")
            process_file(file_path)
