In [1]:
%pip  install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

In [None]:
import pandas as pd

def extract_column_to_csv(csv_file_path, column_name, output_csv_path):
    """
    Extract a specific column from a CSV file and save it as a CSV file.
    
    Parameters:
    - csv_file_path: Path to the CSV file
    - column_name: The name of the column to extract
    - output_csv_path: Path to save the extracted column as a CSV file
    """
    try:
        # Read the CSV file
        sheet_data = pd.read_csv(csv_file_path)
        
        # Check if the column exists in the sheet
        if column_name in sheet_data.columns:
            # Extract the column and save it to CSV
            sheet_data[[column_name]].dropna().to_csv(output_csv_path, index=False)
            print(f"Column '{column_name}' has been successfully extracted to CSV.")
        else:
            print(f"Column '{column_name}' not found in the CSV file.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage with absolute path
csv_file_path = r'Mpcb_file/MPCB Electronic Waste.csv'  # Adjust with the correct path
column_name = 'Name & Address'  # Replace with the column name you want to extract
output_csv_path = 'output_file.csv'  # Path to save the output CSV file

extract_column_to_csv(csv_file_path, column_name, output_csv_path)


Column 'Company Name' has been successfully extracted to CSV.


In [13]:
import pandas as pd

def print_column_names_from_second_row(csv_file_path):
    """
    Print column names from the second row of a CSV file.
    
    Parameters:
    - csv_file_path: Path to the CSV file
    """
    try:
        # Read the CSV file and use the second row as the column names
        data = pd.read_csv(csv_file_path, header=1)
        
        # Print the column names
        print("Column names from the second row:")
        for column in data.columns:
            print(column)
    except Exception as e:
        print(f"Error: {e}")

# Example usage with absolute path
csv_file_path = r'Mpcb_file/MPCB Electronic Waste.csv'  # Adjust with the correct path

print_column_names_from_second_row(csv_file_path)


Column names from the second row:
Sr. No.
Name and Address of Industry
Regional Office
Type
Capacity Allotted as per
 Consent / Authorisation (MT/A)
Consent / Authorization No. & Issue Date
Validity of
 Consent / Authorization
E-mail Address
Contact Number
Current Status


In [14]:
import pandas as pd

def extract_column_to_csv_from_second_row(csv_file_path, column_name, new_column_name, output_csv_path):
    """
    Extract a specific column from the second row of a CSV file, rename it, and save it as a CSV file.
    
    Parameters:
    - csv_file_path: Path to the CSV file
    - column_name: The name of the column to extract
    - new_column_name: The new name for the extracted column
    - output_csv_path: Path to save the extracted column as a CSV file
    """
    try:
        # Read the CSV file and use the second row as column names
        sheet_data = pd.read_csv(csv_file_path, header=1)
        
        # Check if the column exists in the sheet
        if column_name in sheet_data.columns:
            # Extract the column, rename it, and save it to CSV
            extracted_column = sheet_data[[column_name]].dropna()
            extracted_column.columns = [new_column_name]
            extracted_column.to_csv(output_csv_path, index=False)
            print(f"Column '{column_name}' from the second row has been successfully extracted and renamed to '{new_column_name}' in the CSV file.")
        else:
            print(f"Column '{column_name}' not found in the CSV file.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage with absolute path
csv_file_path = r'Mpcb_file/MPCB Electronic Waste.csv'  # Adjust with the correct path
column_name = 'Name and Address of Industry'  # Replace with the column name you want to extract
new_column_name = 'Company Name'  # New name for the extracted column
output_csv_path = 'output_file.csv'  # Path to save the output CSV file

extract_column_to_csv_from_second_row(csv_file_path, column_name, new_column_name, output_csv_path)


Column 'Name and Address of Industry' from the second row has been successfully extracted and renamed to 'Company Name' in the CSV file.


In [18]:
import re
import csv

# File paths
input_file = "output_file.csv"  # Replace with your input file name
output_file = "company_names.csv"  # Output file name

# Read the input CSV file
with open(input_file, "r", newline='', encoding="utf-8") as infile:
    reader = csv.reader(infile)
    data = [row[0] for row in reader]  # Assuming company details are in the first column

# Regular expression to extract company names
company_names = [re.match(r"^[^,]+", row).group(0) for row in data if re.match(r"^[^,]+", row)]

# Save the cleaned company names into a new CSV file
with open(output_file, "w", newline='', encoding="utf-8") as outfile:
    writer = csv.writer(outfile)
   # writer.writerow(["Company Name"])  # Header row
    for name in company_names:
        writer.writerow([name])

print(f"Cleaned company names saved to {output_file}")


Cleaned company names saved to company_names.csv


In [None]:

#mergeing two csv files
import os
import pandas as pd

def merge_csv_files(input_folder, output_file, column_name='Company Name'):
    """
    Merge all CSV files from a specified folder into a single CSV file with one column named 'Name'.
    
    Parameters:
    - input_folder: Path to the folder containing the CSV files
    - output_file: Path to save the merged CSV file
    - column_name: The name of the column to extract (default is 'Name')
    """
    try:
        # List to hold all the company names under 'Name' column
        all_data = []

        # Loop through all files in the specified folder
        for file in os.listdir(input_folder):
            if file.endswith('.csv'):
                file_path = os.path.join(input_folder, file)
                # Read each CSV file
                data = pd.read_csv(file_path)
                
                # Ensure the 'column_name' exists in the CSV, then extract the data
                if column_name in data.columns:
                    all_data.extend(data[column_name].dropna().tolist())  # Add the data to the list

        # Create a DataFrame from the list of all data
        merged_data = pd.DataFrame({column_name: all_data})

        # Save the merged DataFrame to a new CSV file
        merged_data.to_csv(output_file, index=False)
        print(f"Merged CSV file saved at: {output_file}")

    except Exception as e:
        print(f"Error: {e}")

# Example usage
input_folder = r'cpcb_companies'  # Replace with the folder containing your CSV files
output_file = 'cpcb_companynames.csv'  # Path to save the merged CSV file

merge_csv_files(input_folder, output_file)

Merged CSV file saved at: output_file.csv


In [27]:
import pandas as pd

def compare_csv_files(file1_path, file2_path, column_name='Company Name', output_file='comparison_result.csv'):
    """
    Compare two CSV files and print names that are in one file but not in the other.
    
    Parameters:
    - file1_path: Path to the first CSV file
    - file2_path: Path to the second CSV file
    - column_name: The name of the column to compare (default is 'Company Name')
    """
    
    try:
        # Read the CSV files into DataFrames
        df1 = pd.read_csv(file1_path)
        df2 = pd.read_csv(file2_path)

        # Check if the column exists in both files
        if column_name not in df1.columns or column_name not in df2.columns:
            print(f"Column '{column_name}' not found in one of the files.")
            return
        
        # Extract the 'Company Name' column and drop any NaN values
        names_file1 = set(df1[column_name].dropna().str.strip().str.lower())  # Normalize to lower case
        names_file2 = set(df2[column_name].dropna().str.strip().str.lower())  # Normalize to lower case

        # Compare and find names that are in file1 but not in file2
        unique_to_file1 = names_file1 - names_file2
        unique_to_file2 = names_file2 - names_file1
        
        # Print the results
        if unique_to_file1:
            print(f"Names in {file1_path} but not in {file2_path}:")
            print("\n".join(unique_to_file1))
        else:
            print(f"No names found in {file1_path} but not in {file2_path}.")

        if unique_to_file2:
            print(f"\nNames in {file2_path} but not in {file1_path}:")
            print("\n".join(unique_to_file2))
        else:
            print(f"No names found in {file2_path} but not in {file1_path}.")

        # Prepare the result DataFrame
        result_data = {
            'Company Names in Mpcb but not in cpcb': list(unique_to_file1)
        }

        result_df = pd.DataFrame(result_data)

        # Save the results into a CSV file
        result_df.to_csv(output_file, index=False)
        print(f"Comparison results saved to {output_file}")

    except Exception as e:
        print(f"Error: {e}")

# Example usage
file1_path = r'mpcb_companies/mpcbcompany_names.csv'  # Path to the first CSV file
file2_path = r'cpcb_companynames.csv'  # Path to the second CSV file
output_file = 'comparison_result.csv'  # Output file path

compare_csv_files(file1_path, file2_path)


Names in mpcb_companies/mpcbcompany_names.csv but not in cpcb_companynames.csv:
hari om scrap traders
e clean e green recycling
earth sense recycle pvt. ltd.
kapila enterprises
rolex entrprises
national sales corporation
e-recon recycling pvt. ltd.
ecolayer e-waste recycling
connect info solutions india private limited
sahara traders
green valley e-waste management pvt. ltd.
mukesh metal
mercury metal industries (unit iii)
ancus india reprocessing pvt. ltd.
rcube recycling pvt ltd.
alfa trading co.
aman trading co. f/1
perfect e-waste recyclers
biyani polymers
imagine marketing limited
bhangarwala waste management pvt ltd.
sultan disposal stores
comnet e-waste llp
green enviro management
 solutions llp
process recycling
recycling future
jupiter enterprises
bombay scrap traders
d trading company
pune green electronic waste recycler pvt. ltd.
green it recycling centre pvt. ltd.
green valley e waste management pvt ltd
grade infratech pvt. ltd.
anand computer system
e cycle & company
surit