In [19]:
import random
import csv
from faker import Faker
from datetime import datetime, timedelta
import bcrypt
from tqdm import tqdm  # Import tqdm for progress bars

fake = Faker('en_IN')

def hash_password(password):
    return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()

# Platform constraints data
platforms_data = [
    {"platform_id": 1, "platform_name": "Swiggy", "platform_type": "Delivery"},
    {"platform_id": 2, "platform_name": "Zomato", "platform_type": "Delivery"},
    {"platform_id": 3, "platform_name": "Uber", "platform_type": "Driver"},
    {"platform_id": 4, "platform_name": "Ola", "platform_type": "Driver"},
    {"platform_id": 5, "platform_name": "Fiverr", "platform_type": "Freelance"},
    {"platform_id": 6, "platform_name": "InDrive", "platform_type": "Driver"},
    {"platform_id": 7, "platform_name": "Blinkit", "platform_type": "Delivery"},
    {"platform_id": 8, "platform_name": "Zepto", "platform_type": "Delivery"},
    {"platform_id": 9, "platform_name": "MagicPin", "platform_type": "Delivery"}
]

def generate_user_data(user_id):
    return {
        "user_id": user_id,
        "first_name": fake.first_name(),
        "last_name": fake.last_name(),
        "email": fake.email(),
        "password_hash": hash_password("password123"),
        "phone_number": fake.phone_number(),
        "location_lat": random.uniform(18.5, 19.5),
        "location_lng": random.uniform(72.7, 73.0),
        "user_type": random.choice(['Driver', 'Delivery', 'Freelancer']),
        "rating": round(random.uniform(1, 5), 1),
        "availability_status": random.choice(['Available', 'Busy', 'Offline']),
        "total_earnings": round(random.uniform(1000, 100000), 2),
        "preferred_gig_type": random.choice(['Delivery', 'Driving', 'Freelance'])
    }

def generate_gig_data(gig_id, user_id):
    gig_type = random.choice(['Delivery', 'Driver', 'Freelance'])
    platform = random.choice([p for p in platforms_data if p["platform_type"] == gig_type])
    start_time = fake.date_time_this_month()
    end_time = start_time + timedelta(hours=random.randint(1, 5))
    return {
        "gig_id": gig_id,
        "gig_type": platform["platform_type"],
        "platform": platform["platform_name"],
        "location_lat": random.uniform(18.5, 19.5),
        "location_lng": random.uniform(72.7, 73.0),
        "start_time": start_time,
        "end_time": end_time,
        "earnings": round(random.uniform(100, 5000), 2),
        "gig_status": random.choice(['Completed', 'In-progress', 'Cancelled']),
        "demand_score": round(random.uniform(0.5, 1.0), 2),
        "travel_distance": round(random.uniform(1, 15), 1),
        "estimated_time_to_complete": random.randint(10, 180),
        "user_id": user_id
    }

def save_to_csv(data, filename, fieldnames):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

# Generate data with progress bars
print("Generating user data...")
users = [generate_user_data(user_id) for user_id in tqdm(range(1, 1001), desc="Users")]

print("Generating gig data...")
gigs = [generate_gig_data(gig_id, random.randint(1, 1000)) for gig_id in tqdm(range(1, 3001), desc="Gigs")]

# Save to CSV
print("Saving users to CSV...")
save_to_csv(users, "Users.csv", list(users[0].keys()))

print("Saving gigs to CSV...")
save_to_csv(gigs, "Gigs.csv", list(gigs[0].keys()))

print("Data generation complete with constraints!")


Generating user data...


Users:   0%|          | 0/1000 [00:00<?, ?it/s]

Users: 100%|██████████| 1000/1000 [07:16<00:00,  2.29it/s]


Generating gig data...


Gigs: 100%|██████████| 3000/3000 [00:00<00:00, 3629.19it/s]

Saving users to CSV...
Saving gigs to CSV...
Data generation complete with constraints!





In [1]:
username='arora707vicky'
password='FxQgGE8zM07E0yS7'
cluster_url=''

In [2]:
from pymongo import MongoClient

# Replace <username>, <password>, and <cluster-url> with your details
client = MongoClient(f"mongodb+srv://arora707vicky:{password}@gigs.7jzwe.mongodb.net/?retryWrites=true&w=majority&appName=gigs")
db = client.AggregatorDB



In [3]:
# Get the database and collection
db = client['gigs']  # Use the database name you want to access
collection = db['Gigs']  # Use the collection you want to access

# Example query: Find all documents in the collection
for document in collection.find():
    print(document)

In [3]:
from pymongo import MongoClient

# MongoDB Atlas connection string
uri = f"mongodb+srv://arora707vicky:{password}@gigs.7jzwe.mongodb.net/?retryWrites=true&w=majority&appName=gigs"
client = MongoClient(uri)

# Connect to the database and collection
db = client['Gigs']  # Your database name

# Delete all data from Users collection
db.Users.delete_many({})

# Delete all data from Gigs collection
db.Gigs.delete_many({})

print("All data cleared from collections.")


All data cleared from collections.


In [7]:
import pandas as pd
from pymongo import MongoClient

# MongoDB Atlas connection string
uri = f"mongodb+srv://arora707vicky:{password}@gigs.7jzwe.mongodb.net/?retryWrites=true&w=majority&appName=gigs"
client = MongoClient(uri)

# Connect to the database and collection
db = client['Gigs']  # Your database name
users_collection = db['users']  # Replace with your collection name
gigs_collection = db['gigs']
platforms_collection = db['platforms']

# Read CSV files using pandas
users_df = pd.read_csv('Users.csv')  # Path to your Users.csv file
gigs_df = pd.read_csv('Gigs.csv')  # Path to your Gigs.csv file
platforms_df = pd.read_csv('Platforms.csv')  # Path to your Platforms.csv file

# Convert DataFrame to list of dictionaries
users_data = users_df.to_dict(orient='records')
gigs_data = gigs_df.to_dict(orient='records')
platforms_data = platforms_df.to_dict(orient='records')

# Insert data into MongoDB collections
users_collection.insert_many(users_data)
gigs_collection.insert_many(gigs_data)
platforms_collection.insert_many(platforms_data)

print("Data inserted successfully!")


Data inserted successfully!


In [9]:
import random
import csv
from faker import Faker
from datetime import datetime, timedelta
import bcrypt
from tqdm import tqdm  # Import tqdm for progress bars

fake = Faker('en_IN')

# Platform constraints data
platforms_data = [
    {"platform_id": 1, "platform_name": "Swiggy", "platform_type": "Delivery"},
    {"platform_id": 2, "platform_name": "Zomato", "platform_type": "Delivery"},
    {"platform_id": 3, "platform_name": "Uber", "platform_type": "Driver"},
    {"platform_id": 4, "platform_name": "Ola", "platform_type": "Driver"},
    {"platform_id": 5, "platform_name": "Fiverr", "platform_type": "Freelance"},
    {"platform_id": 6, "platform_name": "InDrive", "platform_type": "Driver"},
    {"platform_id": 7, "platform_name": "Blinkit", "platform_type": "Delivery"},
    {"platform_id": 8, "platform_name": "Zepto", "platform_type": "Delivery"},
    {"platform_id": 9, "platform_name": "MagicPin", "platform_type": "Delivery"}
]

# Platform analytics generation
def generate_platform_analytics_data(record_id, platform_id):
    return {
        "record_id": record_id,
        "platform_id": platform_id,
        "active_users": random.randint(500, 5000),
        "completed_gigs": random.randint(100, 5000),
        "cancellation_rate": round(random.uniform(0, 0.2), 2),
        "average_earnings_per_gig": round(random.uniform(50, 5000), 2),
        "average_completion_time": random.randint(10, 180),
        "demand_fluctuation_score": round(random.uniform(0.5, 1.5), 2)
    }

# Save to CSV
def save_to_csv(data, filename, fieldnames):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

# Generate platform analytics with progress bar
print("Generating platform analytics data...")
platform_analytics = [
    generate_platform_analytics_data(record_id, platform_data["platform_id"])
    for record_id, platform_data in tqdm(enumerate(platforms_data, start=1), desc="Platform Analytics")
]

# Save platform analytics to CSV
print("Saving platform analytics to CSV...")
save_to_csv(platform_analytics, "PlatformAnalytics.csv", list(platform_analytics[0].keys()))

print("Additional data generation complete!")


Generating platform analytics data...


Platform Analytics: 9it [00:00, ?it/s]

Saving platform analytics to CSV...
Additional data generation complete!





In [7]:
import os
import csv
from pymongo import MongoClient

# Load environment variables
# MongoDB Atlas connection string
uri = f"mongodb+srv://arora707vicky:{password}@gigs.7jzwe.mongodb.net/?retryWrites=true&w=majority&appName=gigs"
client = MongoClient(uri)
db = client['gig']
platform_analytics_collection = db['Platform Analytics']

# Path to the CSV file
csv_file_path = "PlatformAnalytics.csv"  # Replace with your actual file path if different

# Load data from CSV and push to MongoDB
def push_csv_to_mongodb(csv_path, collection):
    with open(csv_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = [row for row in reader]  # Read all rows as dictionaries
        for row in data:
            # Convert numeric fields to appropriate types
            row['record_id'] = int(row['record_id'])
            row['platform_id'] = int(row['platform_id'])
            row['active_users'] = int(row['active_users'])
            row['completed_gigs'] = int(row['completed_gigs'])
            row['cancellation_rate'] = float(row['cancellation_rate'])
            row['average_earnings_per_gig'] = float(row['average_earnings_per_gig'])
            row['average_completion_time'] = int(row['average_completion_time'])
            row['demand_fluctuation_score'] = float(row['demand_fluctuation_score'])
            collection.insert_one(row)  # Insert each row as a document

    print(f"Data from '{csv_path}' has been successfully pushed to MongoDB.")

# Push data
push_csv_to_mongodb(csv_file_path, platform_analytics_collection)


Data from 'PlatformAnalytics.csv' has been successfully pushed to MongoDB.
