In [1]:
import pandas as pd
import os

def clean_book_data(file_path, save_path):
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"The file '{file_path}' was not found.")

        df = pd.read_csv(file_path)
        print("Original DataFrame:")
        print(df.head())

        df.drop(columns=['Flickr URL', 'Edition Statement', 'Corporate Contributors'], inplace=True, errors='ignore')
        df.reset_index(drop=True, inplace=True)

        if 'Date_of_Publication' in df.columns:
            df['Publication_Year'] = df['Date_of_Publication'].str.extract(r'(\d{4})')

        if 'Author' in df.columns:
            df['Author'] = df['Author'].str.strip().str.title()

        print("Cleaned DataFrame:")
        print(df.head())

        df.to_csv(save_path, index=False)
        print(f"Cleaned DataFrame saved to '{save_path}'")

    except Exception as e:
        print(f"An error occurred: {e}")

clean_book_data('BL-Flickr-Images-Book.csv', 'Cleaned-csv.csv')


Original DataFrame:
   Identifier             Edition Statement      Place of Publication  \
0         206                           NaN                    London   
1         216                           NaN  London; Virtue & Yorston   
2         218                           NaN                    London   
3         472                           NaN                    London   
4         480  A new edition, revised, etc.                    London   

  Date of Publication              Publisher  \
0         1879 [1878]       S. Tinsley & Co.   
1                1868           Virtue & Co.   
2                1869  Bradbury, Evans & Co.   
3                1851          James Darling   
4                1857   Wertheim & Macintosh   

                                               Title     Author  \
0                  Walter Forbes. [A novel.] By A. A      A. A.   
1  All for Greed. [A novel. The dedication signed...  A., A. A.   
2  Love the Avenger. By the author of “All for Gr..