# The following code performs parsing of csv, json, and xml file into a same format.

In [1]:
import csv
import json
import xml.etree.ElementTree as ET

def parse_csv(file_path):
    with open("DPA/data1.csv", mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        data = [row for row in reader]
    return data

def parse_json(file_path):
    with open("DPA/data.json", mode='r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def parse_xml(file_path):
    tree = ET.parse("DPA/data.xml")
    root = tree.getroot()
    data = [] #empty list
    for child in root:
        data.append({elem.tag: elem.text for elem in child})
    return data

if __name__ == "__main__":
    csv_data = parse_csv("data1.csv")
    print("CSV Data:", csv_data)
    
    json_data = parse_json("data.json")
    print("JSON Data:", json_data)
    
    xml_data = parse_xml("data.xml")
    print("XML Data:", xml_data)


CSV Data: [{'name': 'Alice', 'age': '30', 'city': 'New York'}, {'name': 'Bob', 'age': '25', 'city': 'Los Angeles'}]
JSON Data: [{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}, {'name': 'David', 'age': 40, 'city': 'Houston'}]
XML Data: [{'name': 'Emma', 'age': '28', 'city': 'Miami'}, {'name': 'Frank', 'age': '33', 'city': 'Seattle'}]


# The following code reads an xlsx file. (Specify extension while giving filename)

In [4]:
import pandas as pd

def extract_excel_data(file_path, sheet_name=0):
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        return df
    except Exception as e:
        return f"Error: {str(e)}"

if __name__ == "__main__":
    file_path = input("Enter Excel file path: ")
    sheet_name = input("Enter sheet name (leave blank for first sheet): ") or 0
    data = extract_excel_data(file_path, sheet_name)
    print("Extracted Data:")
    print(data)


Enter Excel file path: DPA/facultydetails.xlsx
Enter sheet name (leave blank for first sheet): 
Extracted Data:
                 Name of Department Name of the Faculty Gender (Male/Female)  \
0                               NaN                 NaN                  NaN   
1  Computer Science and Engineering    Dr. Dinesh Gupta                 Male   
2  Computer Science and Engineering     Dr. Raman Kumar                 Male   
3  Computer Science and Engineering    Dr. Anshu Bhasin               Female   
4  Computer Science and Engineering           Dr. Pooja               Female   

  Academic Designation (Professor/Associate Professor/Assistant Professor/Vice Chancellor/Dean/Director/ Principal/Other/Associate Dean/Reader/lecturer  \
0                                                NaN                                                                                                      
1                                Assistant Professor                                             

# The following code normalizes the data using MinMaxScaler

In [5]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def normalize_data(file_path):
    try:
        # Load data
        df = pd.read_csv(file_path)
        
        # Initialize scaler
        scaler = MinMaxScaler()
        
        # Normalize numeric columns only
        numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

        return df
    except Exception as e:
        return f"Error: {str(e)}"

if __name__ == "__main__":
    file_path = input("Enter CSV file path: ")
    normalized_df = normalize_data(file_path)
    print("Normalized Data:")
    print(normalized_df)

Enter CSV file path: DPA/salary.csv
Normalized Data:
      Name       Age    Salary     Score
0    Alice  0.000000  0.000000  0.500000
1      Bob  0.333333  0.333333  0.000000
2  Charlie  0.666667  0.666667  1.000000
3    David  1.000000  1.000000  0.714286
4      Eve  0.200000  0.166667  0.214286


# The following code aggregates the data 

In [6]:
import pandas as pd

def aggregate_data(file_path):
    try:
        # Load data
        df = pd.read_csv(file_path)
        
        # Perform aggregation: calculate the mean of Age, Salary, and Score
        aggregated_df = df.agg({'Age': 'mean', 'Salary': 'mean', 'Score': 'mean'})

        return aggregated_df
    except Exception as e:
        return f"Error: {str(e)}"

if __name__ == "__main__":
    file_path = input("Enter CSV file path: ")
    aggregated_df = aggregate_data(file_path)
    print("Aggregated Data:")
    print(aggregated_df)


Enter CSV file path: DPA/salary.csv
Aggregated Data:
Age          31.6
Salary    63000.0
Score        84.8
dtype: float64


# The following code converts data.json file into data2.csv

In [7]:
import pandas as pd

def json_to_csv(json_file_path, csv_file_path):
    try:
        # Load JSON data
        df = pd.read_json(json_file_path)
        
        # Convert to CSV
        df.to_csv(csv_file_path, index=False)
        print(f"JSON data successfully converted to CSV and saved at {csv_file_path}")
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    json_file = input("Enter JSON file path: ")
    csv_file = input("Enter CSV file path: ")
    json_to_csv(json_file, csv_file)

Enter JSON file path: DPA/data.json
Enter CSV file path: DPA/data3.csv
JSON data successfully converted to CSV and saved at DPA/data3.csv


In [8]:
import pandas as pd

def json_to_csv_with_encoding(json_file_path, csv_file_path):
    try:
        # Load JSON data
        df = pd.read_json(json_file_path)

        # Convert categorical data to numerical using one-hot encoding
        df = pd.get_dummies(df, columns=['city'])

        # Convert to CSV
        df.to_csv(csv_file_path, index=False)
        print(f"JSON data with encoded categorical variables successfully converted to CSV and saved at {csv_file_path}")
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    json_file = input("Enter JSON file path: ")
    csv_file = input("Enter CSV file path: ")
    json_to_csv_with_encoding(json_file, csv_file)

Enter JSON file path: DPA/data.json
Enter CSV file path: DPA/nd.csv
JSON data with encoded categorical variables successfully converted to CSV and saved at DPA/nd.csv


In [1]:
pip install -U notebook-as-pdf


Collecting notebook-as-pdf
  Obtaining dependency information for notebook-as-pdf from https://files.pythonhosted.org/packages/be/aa/33c6dc40a09b01d77a657e95461932463e4c061ba623e6bbc4f6ab15634d/notebook_as_pdf-0.5.0-py3-none-any.whl.metadata
  Downloading notebook_as_pdf-0.5.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pyppeteer (from notebook-as-pdf)
  Obtaining dependency information for pyppeteer from https://files.pythonhosted.org/packages/3d/ee/fb2757a38025421fd3844a0ed0a230b78c9c04a66355024436cf3005a70c/pyppeteer-2.0.0-py3-none-any.whl.metadata
  Downloading pyppeteer-2.0.0-py3-none-any.whl.metadata (7.1 kB)
Collecting PyPDF2 (from notebook-as-pdf)
  Obtaining dependency information for PyPDF2 from https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl.metadata
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pyee<12.0.0,>=11.0.0 (from pyppeteer->notebook-as-pdf)
  Obtaini

In [2]:
pyppeteer-install

NameError: name 'pyppeteer' is not defined