In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF


class FileSaver:
    def __init__(self, file_path):
        """
        Initializes the FileSaver object with the base file path where files will be saved.
        
        Args:
            file_path: The directory path where files will be saved.
        """
        self.file_path = file_path

    def save_as_csv(self, data, file_name):
        """
        Saves the data as a CSV file.
        
        Args:
            data: The DataFrame to be saved as CSV.
            file_name: The name of the file (without extension).
        """
        try:
            full_path = os.path.join(self.file_path, f"{file_name}.csv")
            data.to_csv(full_path, index=False)
            print_log(f"File saved as {file_name}.csv")
        except Exception as e:
            print_log(f"An error occurred while saving CSV: {e}")

    def save_as_pdf(self, data, file_name):
        """
        Saves the data as a PDF file, with each line of the data as text.
        
        Args:
            data: The data to be saved as a PDF file (expects an iterable).
            file_name: The name of the file (without extension).
        """
        try:
            full_path = os.path.join(self.file_path, f"{file_name}.pdf")
            pdf = FPDF()
            pdf.add_page()
            pdf.set_font("Times New Roman", size=12)

            for line in data:
                pdf.cell(200, 10, txt=str(line), ln=True, align='L')

            pdf.output(full_path)
            print_log(f"File saved as {file_name}.pdf")
        except Exception as e:
            print_log(f"An error occurred while saving PDF: {e}")

    def save_as_png(self, data, file_name):
        """
        Saves the data as a PNG file, assuming the data can be plotted.
        
        Args:
            data: The data to be plotted and saved as a PNG file.
            file_name: The name of the file (without extension).
        """
        try:
            full_path = os.path.join(self.file_path, f"{file_name}.png")
            plt.figure(figsize=(6, 4))
            plt.plot(data)
            plt.savefig(full_path)
            plt.close()
            print_log(f"File saved as {file_name}.png")
        except Exception as e:
            print_log(f"An error occurred while saving PNG: {e}")

    def save_file(self, data, file_name, file_type):
        """
        Saves the data in the specified format (csv, pdf, png).
        
        Args:
            data: The data to be saved (expects different formats based on file type).
            file_name: The name of the file (without extension).
            file_type: The type of file ('csv', 'pdf', or 'png').
        """
        if file_type == 'csv':
            self.save_as_csv(data, file_name)
        elif file_type == 'pdf':
            self.save_as_pdf(data, file_name)
        elif file_type == 'png':
            self.save_as_png(data, file_name)
        else:
            print_log(f"Unsupported file type: {file_type}. Please choose 'csv', 'pdf', or 'png'.")



In [3]:
import os  # To check if the directory exists
import pandas as pd
from uploadfile import FileUpload
from data_visualizer import plot_data
from data_cleaner import DataCleaner
from print_log import print_log
from save_file import FileSaver

class DataReview:
    def __init__(self, file_path):
        """
        Initializes the DataReview object.
        
        Args:
            file_path (str): The full path to the Excel file to be processed.
        """
        self.file_path = file_path
        self.file_upload = FileUpload()  # Create an instance of FileUpload

    def check_directory(self):
        """
        Checks if the directory of the provided file path exists.

        Returns:
            bool: True if the directory exists, False otherwise.
        """
        directory = os.path.dirname(self.file_path)  # Get the directory of the file path
        if not os.path.exists(directory):
            print(f"Directory {directory} does not exist.")
            return False
        print(f"Directory {directory} exists.")
        return True

    def run_app(self):
        """
        Runs the main application logic.
        """
        print_log("Starting Operation...")

        # Load data using the FileUpload class
        data = self.file_upload.stage_data(self.file_path)  # Use self.file_path directly

        if data is not None:
            print("Data processing can continue...")  
            try:
                # Step 1: Initialize the DataCleaner object and clean the data
                cleaner = DataCleaner(data)
                cleaner.missing_values().fill_missing_values().clean_data()
                cleaned_data = cleaner.get_cleaned_data()

                # Step 2: Initialize the FileSaver object with the desired saving directory
                saved_file_directory = os.path.join(directory_path, "uk_housing_datasets", "reviewed_datasets")
                os.makedirs(saved_file_directory, exist_ok=True)  # Ensure the directory exists
                file_saver = FileSaver(saved_file_directory)  # Pass the directory

                # Step 3: Save the cleaned data in various formats
                file_saver.save_file(cleaned_data, "Uk_Local_Housing_Data", "csv")  # Save as CSV
                # You can also save it as PDF or PNG based on the requirements
                # file_saver.save_file(cleaned_data, "Housing_data", "pdf")
                # file_saver.save_file(cleaned_data['column_to_plot'], "Housing_data_plot", "png")

                # Step 4: Visualize the data (if required)
                # Uncomment the line below to visualize the data after cleaning
                # plot_data(cleaned_data)

            except Exception as e:
                print_log(f"Operation Failed: {e}")

        else:
            print("Data loading failed, exiting application.")

if __name__ == "__main__":
    try:
        # Specify the directory path (replace with your actual path)
        directory_path = "/Users/mac/Desktop/n-dev/data/housing_datasets/housing_data"
        uk_housing_file_path = os.path.join(directory_path, "uk_housing_datasets", "uk_local_housing_project_started", "UK_local_authority_housing_data.xlsx")
        
        # Create an instance of DataReview
        data_review = DataReview(uk_housing_file_path)

        # Check if the directory exists
        if data_review.check_directory():
            data_review.run_app()  # Call the main application logic
    except Exception as e:
        print(f"Operation failed: {e}")


ModuleNotFoundError: No module named 'uploadfile'