**Generate a Course at Random First**:
   This step involves generating a course randomly. Without more context, it's unclear what kind of course you're referring to. However, you would need to generate some data representing a course, which could include fields like course name, course code, instructor, etc.

In [18]:
import random
import csv

def generate_fake_courses(num_records):
    courses = ['Java', 'Python', 'PHP', 'C#']  # List of available courses

    with open('csv/Courses.csv', 'w', newline='') as csvfile:
        fieldnames = ['Course']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for _ in range(num_records):
            writer.writerow({'Course': random.choice(courses)})  # Choose a random course from the list

if __name__ == "__main__":
    num_records = 100000
    generate_fake_courses(num_records)


**Generate faker, merge faker and course, and split to three output**:
   This step involves using Faker library to generate fake data, merging it with the course data generated in the first step, and then splitting the merged data into three output files. The specifics of how to merge and split the data depend on the structure of your data and your requirements.

In [19]:
from faker import Faker
import csv
import random
import pandas as pd

fake = Faker()

def generate_fake_data(num_records):
    with open('csv/fake_data.csv', 'w', newline='') as csvfile:
        fieldnames = ['Name', 'Contact', 'Country', 'CountryCode']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for _ in range(num_records):
            writer.writerow({
                'Name': fake.name(),
                'Contact': fake.phone_number(),
                'Country': fake.country(),
                'CountryCode': fake.country_code()
            })


def merge_csv(df1,df2):
    merged_df = pd.concat([df1, df2], axis=1)
    merged_df.to_csv("csv/merged_Courses_output.csv", index=False)



def split_csv(input_file):
    output_files = ['csv/splitoutput1.csv', 'csv/splitoutput2.csv', 'csv/splitoutput3.csv']

    writers = {}
    for file_name in output_files:
        writers[file_name] = csv.DictWriter(open(file_name, 'w', newline=''), fieldnames=['Country', 'Name', 'Contact', 'CountryCode', 'Course'])
        writers[file_name].writeheader()

    with open(input_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            output_file = random.choice(output_files)
            writers[output_file].writerow(row)



if __name__ == "__main__":
    num_records = 100000
    generate_fake_data(num_records)

    df1 = pd.read_csv("csv/fake_data.csv")
    df2 = pd.read_csv("csv/Courses.csv")
    merge_csv(df1,df2)

    input_file = 'csv/merged_Courses_output.csv'
    split_csv(input_file)
    print("Fake data generated and CSV file split completed.")


Fake data generated and CSV file split completed.


**Run NRB scrap and generate forex.csv**:
   This step involves scraping data from NRB (presumably Nepal Rastra Bank) and generating a file named `forex.csv`. This file likely contains foreign exchange rate data.

In [20]:
import urllib.request
import re

user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'

url ='https://www.nrb.org.np/forex/'
request = urllib.request.Request(url, headers={'User-Agent': user_agent})
response = urllib.request.urlopen(request)
html_content = response.read().decode('utf-8')

# Define the regex pattern
currency_pattern = r'<div class="flag flag--..."></div>\s*<div class="ml-2 text-uppercase">(.*?)\s*<span\s*class="text-capitalize">\(([^)]+)\)</span>'

# Find all matches in the HTML
currency_matches = re.findall(currency_pattern, html_content) 

currency_code = [item[0] for item in currency_matches]
currency_code = [code.upper() for code in currency_code]
currency = [item[1] for item in currency_matches]

data_pattern = r'<td>(.*?)</td>\s*<td>(.*?)</td>\s*<td>(.*?)</td>'
# Find all matches in the HTML
data_matches = re.findall(data_pattern, html_content) 

units = [item[0] for item in data_matches]
buy = [item[1] for item in data_matches]
sell = [item[2] for item in data_matches]

# Create a list of tuples containing job titles and companies
data = list(zip(currency_code,currency,units,buy,sell))
import csv

# Define the CSV file path
csv_file = 'csv/forex.csv'

# Write the data to a CSV file
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Currency_Code','Currency','Units','Buy','Sell'])  # Write header row
    writer.writerows(data)  # Write data rows

print(f'Data exported to {csv_file}')

Data exported to csv/forex.csv


**Generate: Data exported to csv/Country_and_CurrencyCode.csv**:
   This step involves generating a file named `Country_and_CurrencyCode.csv`. The contents of this file are not specified, but it likely contains data relating to countries and currency codes.

In [21]:
url ='https://www.iban.com/currency-codes'
request = urllib.request.Request(url, headers={'User-Agent': user_agent})
response = urllib.request.urlopen(request)
html_content = response.read().decode('utf-8')

data_pattern = r'<td>(.*?)</td>\s*<td>(.*?)</td>\s*<td>(.*?)</td>\s*<td>(.*?)</td>'
# Find all matches in the HTML
data_matches = re.findall(data_pattern, html_content) 

Country = [item[0] for item in data_matches]
Currency = [item[1] for item in data_matches]
Currency_Code = [item[2] for item in data_matches]
numbers = [item[3] for item in data_matches]

# Create a list of tuples containing job titles and companies
data = list(zip( Currency_Code, Currency,Country))
import csv

# Define the CSV file path
csv_file = 'csv/Country_and_CurrencyCode.csv'

# Write the data to a CSV file
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Currency_Code','Currency','Country'])  # Write header row
    writer.writerows(data)  # Write data rows

print(f'Data exported to {csv_file}')

Data exported to csv/Country_and_CurrencyCode.csv


**Merge Forex_Country_Merged.py**:
   This step involves running a Python script (`Forex_Country_Merged.py`) to merge `forex.csv` and `Country_and_CurrencyCode.csv` based on currency codes. The output of this merge is saved to a file named `CountryAdd_onCurrency_output.csv`.

In [22]:
import pandas as pd

def merge_csv_complex(df3, df4, output_file):
    merged_df = pd.merge(df4, df3, on=['Currency_Code'])
    merged_df.to_csv(output_file, index=False)

if __name__ == "__main__":
    df3 = pd.read_csv("csv/forex.csv")
    df4 = pd.read_csv("csv/Country_and_CurrencyCode.csv")
    output_file = 'csv/CountryAdd_onCurrency_output.csv'
    merge_csv_complex(df3, df4, output_file)


**Merge_all.py**:
   This step involves running another Python script to perform multiple operations:
   - Reorder the columns in the data to bring the "Country" column first.
   - Convert the data to lowercase to ensure case-insensitive matching.
   - Merge the reordered and lowercased data with other datasets.
   - Export the final merged data.

In [25]:
import pandas as pd

def merge_csv_complex(out1, out2, out3, mergedcsv1, output_file1, output_file2, output_file3):

    # Convert "Country" column to lowercase for case-insensitive matching
    mergedcsv1['Country'] = mergedcsv1['Country'].str.lower()
    out1['Country'] = out1['Country'].str.lower()
    out2['Country'] = out2['Country'].str.lower()
    out3['Country'] = out3['Country'].str.lower()

    # Perform the merge
    merged_df1 = pd.merge(out1, mergedcsv1, on=['Country'])
    merged_df2 = pd.merge(out2, mergedcsv1, on=['Country'])
    merged_df3 = pd.merge(out3, mergedcsv1, on=['Country'])
    
    # Write the merged DataFrames to CSV files
    merged_df1.to_csv(output_file1, index=False)
    merged_df2.to_csv(output_file2, index=False)
    merged_df3.to_csv(output_file3, index=False)

if __name__ == "__main__":
    mergedcsv = pd.read_csv("csv/CountryAdd_onCurrency_output.csv")
    # Reorder columns
    df = mergedcsv[['Country', 'Currency_Code', 'Currency_x', 'Currency_y', 'Units', 'Buy', 'Sell']]
    # Write back to CSV
    df.to_csv('csv/CountryRearrange.csv', index=False)

    out1 = pd.read_csv("csv/splitoutput1.csv")
    out2 = pd.read_csv("csv/splitoutput2.csv")
    out3 = pd.read_csv("csv/splitoutput3.csv")


    output_file1 = 'csv/finaldata1.csv'
    output_file2 = 'csv/finaldata2.csv'
    output_file3 = 'csv/finaldata3.csv'
    mergedcsv1 = pd.read_csv('csv/CountryRearrange.csv')


    merge_csv_complex(out1, out2, out3, mergedcsv1, output_file1, output_file2, output_file3)

print("Final Output Files Created")



Final Output Files Created
