In [30]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Function to generate random training schedule data
def generate_training_schedule(num_entries):
    topics = [
        "Software Development", "Data Science", "Cybersecurity", "Cloud Computing", "Web Development", 
        "Machine Learning", "Deep Learning", "Natural Language Processing", "Artificial Intelligence", 
        "Network Administration", "Database Management", "DevOps", "Mobile App Development", "Internet of Things (IoT)", 
        "Big Data Analytics", "Blockchain Technology", "Virtualization", "System Administration", 
        "IT Infrastructure Management", "Information Security", "Computer Forensics", "Penetration Testing", 
        "Ethical Hacking", "Digital Marketing Analytics", "UX/UI Design", "Game Development", "Embedded Systems", 
        "Robotics", "Computer Vision", "Natural Language Processing (NLP)", "Business Intelligence", "Cloud Security", 
        "Data Engineering", "Quantum Computing", "Augmented Reality (AR)", "Virtual Reality (VR)", "Bioinformatics", 
        "Geographical Information Systems (GIS)", "Renewable Energy Systems", "Industrial Automation", 
        "Wireless Communication", "5G Technology", "Soft Skill Training", "Semiconductor Manufacturing", 
        "Computational Biology", "Nanotechnology", "Human-Computer Interaction", "Cognitive Computing", 
        "Predictive Maintenance", "Supply Chain Management Systems", "Enterprise Resource Planning (ERP)", 
        "Quality Assurance and Testing", "Technical Writing and Documentation"
    ]
    
    # Define date range from 2021 to today
    start_date_range = datetime(2021, 1, 1)
    end_date_range = datetime.now()
    
    training_data = []
    for _ in range(num_entries):
        topic = random.choice(topics)
        start_date = random_date(start_date_range, end_date_range)
        end_date = random_date(start_date + timedelta(days=1), end_date_range) # Ensure end date is after start date
        start_time = random.randint(9, 16)
        end_time = random.randint(start_time + 1, 18)
        training_data.append([topic, start_date, start_time, end_time])
    
    return pd.DataFrame(training_data, columns=['Training Topic', 'Training Date', 'Training Start Time', 'Training End Time'])

# Function to generate random assessment schedule data
def generate_assessment_schedule(training_schedule):
    assessment_data = []
    for index, row in training_schedule.iterrows():
        topic = row['Training Topic']
        date = random_date(row['Training Date'] + timedelta(days=1), datetime.now()) # Ensure assessment date is after training date
        start_time = random.randint(9, 15)
        end_time = random.randint(start_time + 1, 18)
        total_score = random.choice([25, 50, 100, 120])
        assessment_data.append([index, topic, 'Assessment on ' + topic, date, start_time, end_time, total_score])
    
    return pd.DataFrame(assessment_data, columns=['Assessment ID', 'Assessment Domain', 'Assessment Sub-Topic', 'Date of Test', 'Start Time of Test', 'End Time of Test', 'Total Score'])

# # Function to generate random scores data
# def generate_scores(assessment_schedule):
#     scores_data = []
#     for index, row in assessment_schedule.iterrows():
#         obtained_score = random.randint(0, row['Total Score'])
#         scores_data.append([row['Assessment ID'], 'User' + str(index), 'user' + str(index) + '@example.com', obtained_score])
    
#     return pd.DataFrame(scores_data, columns=['Assessment ID', 'User Name', 'Mail ID', 'Obtained Score'])

# Function to generate scores data for existing users
def generate_scores(assessment_schedule, users):
    scores_data = []
    for index, row in assessment_schedule.iterrows():
        # Ensure that only existing users are selected
        user = random.choice(users)
        obtained_score = random.randint(0, row['Total Score'])
        scores_data.append([row['Assessment ID'], user[0], user[1], obtained_score])  # Assuming user is a tuple (username, email)
    
    return pd.DataFrame(scores_data, columns=['Assessment ID', 'User Name', 'Mail ID', 'Obtained Score'])

# Example list of existing users with (username, mail id) tuples
existing_users = [
    ('User1', 'user1@example.com'),
    ('User2', 'user2@example.com'),
    ('User3', 'user3@example.com'),
    ('User4', 'user4@example.com'),
    ('User5', 'user5@example.com'),
    ('User6', 'user6@example.com'),
    ('User7', 'user7@example.com'),
    ('User8', 'user8@example.com'),
    ('User9', 'user9@example.com'),
    ('User10', 'user10@example.com'),
    ('User11', 'user11@example.com'),
    ('User12', 'user12@example.com'),
    ('User13', 'user13@example.com'),
    ('User14', 'user14@example.com'),
    ('User15', 'user15@example.com'),
    ('User16', 'user16@example.com'),
    ('User17', 'user17@example.com'),
    ('User18', 'user18@example.com'),
    ('User19', 'user19@example.com'),
    ('User20', 'user20@example.com'),
    ('User21', 'user21@example.com'),
    ('User22', 'user22@example.com'),
    ('User23', 'user23@example.com'),
    ('User24', 'user24@example.com'),
    ('User25', 'user25@example.com'),
    ('User26', 'user26@example.com'),
    ('User27', 'user27@example.com'),
    ('User28', 'user28@example.com'),
    ('User29', 'user29@example.com'),
    ('User30', 'user30@example.com'),
    ('User31', 'user31@example.com'),
    ('User32', 'user32@example.com'),
    ('User33', 'user33@example.com'),
    ('User34', 'user34@example.com'),
    ('User35', 'user35@example.com'),
    ('User36', 'user36@example.com'),
    ('User37', 'user37@example.com'),
    ('User38', 'user38@example.com'),
    ('User39', 'user39@example.com'),
    ('User40', 'user40@example.com'),
    ('User41', 'user41@example.com'),
    ('User42', 'user42@example.com'),
    ('User43', 'user43@example.com'),
    ('User44', 'user44@example.com'),
    ('User45', 'user45@example.com'),
    ('User46', 'user46@example.com'),
    ('User47', 'user47@example.com'),
    ('User48', 'user48@example.com'),
    ('User49', 'user49@example.com'),
    ('User50', 'user50@example.com'),
    ('User51', 'user51@example.com'),
    ('User52', 'user52@example.com'),
    ('User53', 'user53@example.com'),
    ('User54', 'user54@example.com'),
    ('User55', 'user55@example.com'),
    ('User56', 'user56@example.com'),
    ('User57', 'user57@example.com'),
    ('User58', 'user58@example.com'),
    ('User59', 'user59@example.com'),
    ('User60', 'user60@example.com'),
    ('User61', 'user61@example.com'),
    ('User62', 'user62@example.com'),
    ('User63', 'user63@example.com'),
    ('User64', 'user64@example.com'),
    ('User65', 'user65@example.com'),
    ('User66', 'user66@example.com'),
    ('User67', 'user67@example.com'),
    ('User68', 'user68@example.com'),
    ('User69', 'user69@example.com'),
    ('User70', 'user70@example.com'),
    ('User71', 'user71@example.com'),
    ('User72', 'user72@example.com'),
    ('User73', 'user73@example.com'),
    ('User74', 'user74@example.com'),
    ('User75', 'user75@example.com'),
    ('User76', 'user76@example.com'),
    ('User77', 'user77@example.com'),
    ('User78', 'user78@example.com'),
    ('User79', 'user79@example.com'),
    ('User80', 'user80@example.com'),
    ('User81', 'user81@example.com'),
    ('User82', 'user82@example.com'),
    ('User83', 'user83@example.com'),
    ('User84', 'user84@example.com'),
    ('User85', 'user85@example.com'),
    ('User86', 'user86@example.com'),
    ('User87', 'user87@example.com'),
    ('User88', 'user88@example.com'),
    ('User89', 'user89@example.com'),
    ('User90', 'user90@example.com'),
    ('User91', 'user91@example.com'),
    ('User92', 'user92@example.com'),
    ('User93', 'user93@example.com'),
    ('User94', 'user94@example.com'),
    ('User95', 'user95@example.com'),
    ('User96', 'user96@example.com'),
    ('User97', 'user97@example.com'),
    ('User98', 'user98@example.com'),
    ('User99', 'user99@example.com'),
    ('User100', 'user100@example.com'),
    ('User101', 'user101@example.com'),
    ('User102', 'user102@example.com'),
    ('User103', 'user103@example.com'),
    ('User104', 'user104@example.com'),
    ('User105', 'user105@example.com'),
    ('User106', 'user106@example.com'),
    ('User107', 'user107@example.com'),
    ('User108', 'user108@example.com'),
    ('User109', 'user109@example.com'),
    ('User110', 'user110@example.com'),
    ('User111', 'user111@example.com'),
    ('User112', 'user112@example.com'),
    ('User113', 'user113@example.com'),
    ('User114', 'user114@example.com'),
    ('User115', 'user115@example.com'),
    ('User116', 'user116@example.com'),
    ('User117', 'user117@example.com'),
    ('User118', 'user118@example.com'),
    ('User119', 'user119@example.com'),
    ('User120', 'user120@example.com'),
    ('User121', 'user121@example.com'),
    ('User122', 'user122@example.com'),
    ('User123', 'user123@example.com'),
    ('User124', 'user124@example.com'),
    ('User125', 'user125@example.com'),
    ('User126', 'user126@example.com'),
    ('User127', 'user127@example.com'),
    ('User128', 'user128@example.com'),
    ('User129', 'user129@example.com'),
    ('User130', 'user130@example.com'),
    ('User131', 'user131@example.com'),
    ('User132', 'user132@example.com'),
    ('User133', 'user133@example.com'),
    ('User134', 'user134@example.com'),
    ('User135', 'user135@example.com'),
    ('User136', 'user136@example.com'),
    ('User137', 'user137@example.com'),
    ('User138', 'user138@example.com'),
    ('User139', 'user139@example.com'),
    ('User140', 'user140@example.com'),
    ('User141', 'user141@example.com'),
    ('User142', 'user142@example.com'),
    ('User143', 'user143@example.com'),
    ('User144', 'user144@example.com'),
    ('User145', 'user145@example.com'),
    ('User146', 'user146@example.com'),
    ('User147', 'user147@example.com'),
    ('User148', 'user148@example.com'),
    ('User149', 'user149@example.com'),
    ('User150', 'user150@example.com')]


# Function to generate a random date between start_date and end_date
def random_date(start_date, end_date):
    if start_date > end_date:
        start_date, end_date = end_date, start_date
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    return start_date + timedelta(days=random_days)

# Generate training schedule with 25,000 rows
training_schedule = generate_training_schedule(250)

# Generate assessment schedule with 50,000 rows
assessment_schedule = generate_assessment_schedule(generate_training_schedule(500))

# Generate scores with 1,000,000 rows
scores = generate_scores(generate_assessment_schedule(generate_training_schedule(10000)), existing_users)

# Save dataframes to CSV files
training_schedule.to_csv('training_schedule.csv', index=False)
assessment_schedule.to_csv('assessment_schedule.csv', index=False)
scores.to_csv('scores.csv', index=False)

print("CSV files generated successfully!")


CSV files generated successfully!


In [15]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [16]:
# Function to generate random training schedule data
def generate_training_schedule(num_entries):
    topics = [
    "Software Development",
    "Data Science",
    "Cybersecurity",
    "Cloud Computing",
    "Web Development",
    "Machine Learning", 
    "Deep Learning", 
    "Natural Language Processing",
    "Artificial Intelligence",
    "Network Administration",
    "Database Management",
    "DevOps",
    "Mobile App Development",
    "Internet of Things (IoT)",
    "Big Data Analytics",
    "Blockchain Technology",
    "Virtualization",
    "System Administration",
    "IT Infrastructure Management",
    "Information Security",
    "Computer Forensics",
    "Penetration Testing",
    "Ethical Hacking",
    "Digital Marketing Analytics",
    "UX/UI Design",
    "Game Development",
    "Embedded Systems",
    "Robotics",
    "Computer Vision",
    "Natural Language Processing (NLP)",
    "Business Intelligence",
    "Cloud Security",
    "Data Engineering",
    "Quantum Computing",
    "Augmented Reality (AR)",
    "Virtual Reality (VR)",
    "Bioinformatics",
    "Geographical Information Systems (GIS)",
    "Renewable Energy Systems",
    "Industrial Automation",
    "Wireless Communication",
    "5G Technology",
    "Soft Skill Training",
    "Semiconductor Manufacturing",
    "Computational Biology",
    "Nanotechnology",
    "Human-Computer Interaction",
    "Cognitive Computing",
    "Predictive Maintenance",
    "Supply Chain Management Systems",
    "Enterprise Resource Planning (ERP)",
    "Quality Assurance and Testing",
    "Technical Writing and Documentation"
]
    start_dates = pd.date_range(start=datetime.now(), periods=30).tolist()
    end_dates = pd.date_range(start=datetime.now() + timedelta(days=1), periods=30).tolist()
    
    training_data = []
    for _ in range(num_entries):
        topic = random.choice(topics)
        start_date = random.choice(start_dates)
        end_date = random.choice(end_dates)
        start_time = random.randint(9, 16)
        end_time = random.randint(start_time + 1, 18)
        training_data.append([topic, start_date, start_time, end_time])
    
    return pd.DataFrame(training_data, columns=['Training Topic', 'Training Date', 'Training Start Time', 'Training End Time'])


In [17]:

# Function to generate random assessment schedule data
def generate_assessment_schedule(training_schedule):
    assessment_data = []
    for index, row in training_schedule.iterrows():
        topic = row['Training Topic']
        date = row['Training Date'] + timedelta(days=random.randint(1, 7))
        start_time = random.randint(9, 15)
        end_time = random.randint(start_time + 1, 18)
        total_score = random.choice([25, 50, 100, 120])
        assessment_data.append([index, topic, 'Assessment on ' + topic, date, start_time, end_time, total_score])
    
    return pd.DataFrame(assessment_data, columns=['Assessment ID', 'Assessment Domain', 'Assessment Sub-Topic', 'Date of Test', 'Start Time of Test', 'End Time of Test', 'Total Score'])


In [18]:

# Function to generate random scores data
def generate_scores(assessment_schedule):
    scores_data = []
    for index, row in assessment_schedule.iterrows():
        obtained_score = random.randint(0, row['Total Score'])
        scores_data.append([row['Assessment ID'], 'User' + str(index), 'user' + str(index) + '@example.com', obtained_score])
    
    return pd.DataFrame(scores_data, columns=['Assessment ID', 'User Name', 'Mail ID', 'Obtained Score'])


In [19]:


# Generate training schedule
training_schedule = generate_training_schedule(10)

# Generate assessment schedule
assessment_schedule = generate_assessment_schedule(training_schedule)

# Generate scores
scores = generate_scores(assessment_schedule)


In [20]:

# Save dataframes to Excel sheets
with pd.ExcelWriter('machine_learning_data.xlsx') as writer:
    training_schedule.to_excel(writer, sheet_name='Training Schedule', index=False)
    assessment_schedule.to_excel(writer, sheet_name='Assessment Schedule', index=False)
    scores.to_excel(writer, sheet_name='Scores', index=False)

print("Excel sheets generated successfully!")


ModuleNotFoundError: No module named 'openpyxl'