In [2]:
import pandas as pd

In [3]:
def read_csv_to_dataframe(file_name):
    """
    Reads a CSV file and creates a pandas DataFrame from it.

    Args:
        file_name (str): Path to the CSV file.

    Returns:
        pd.DataFrame: A pandas DataFrame containing the data from the CSV file.
    """
    try:
        # Specify the encoding to handle non-UTF-8 files
        df = pd.read_csv(file_name, encoding='ISO-8859-1')
        print(f"Successfully read the file: {file_name}")
        return df
    except FileNotFoundError:
        print(f"Error: File {file_name} not found.")
    except pd.errors.EmptyDataError:
        print(f"Error: File {file_name} is empty.")
    except pd.errors.ParserError:
        print(f"Error: File {file_name} contains parsing errors.")
    except UnicodeDecodeError as e:
        print(f"Error: {e}")
    return None

In [4]:
# Example usage
file_path = "Data/Chemical_Formulae_Wiki_06042025(Sheet1).csv"
dataframe = read_csv_to_dataframe(file_path)
if dataframe is not None:
    print(dataframe.head())

Successfully read the file: Data/Chemical_Formulae_Wiki_06042025(Sheet1).csv
  chemical formulae                     title  CAS number
0             Ac2O3       actinium(III) oxide  12002-61-8
1             AgBF4  Silver tetrafluoroborate  14104-20-2
2              AgBr            silver bromide   7785-23-1
3             AgBrO        silver hypobromite         NaN
4            AgBrO2            silver bromite         NaN


In [5]:
# Example usage
file_path = "Data/Chemical_Formulae_Wiki_06042025(Sheet1).csv"
dataframe = read_csv_to_dataframe(file_path)

if dataframe is not None:
    # Output the DataFrame to a text file without headers, separating each line by row
    output_file = "Data/Chemical_Formulae_Wiki_06042025.txt"
    dataframe.to_csv(output_file, index=False, header=False, sep='\t')
    print(f"Data has been written to {output_file}")

Successfully read the file: Data/Chemical_Formulae_Wiki_06042025(Sheet1).csv
Data has been written to Data/Chemical_Formulae_Wiki_06042025.txt
