In [4]:
import pandas as pd
import re
import numpy as np
import plotly.graph_objects as go
import plotly.offline as offline

class DataVisualizer:
    """
    A class for analyzing data from a CSV file.

    This class provides methods to read a CSV file into a pandas DataFrame, select columns to be analyzed,
    convert the DataFrame to a numpy array, count the number of rows satisfying a condition for each column,
    and analyze the data based on a condition and threshold.
    """

    def __init__(self, file_path):
        """
        Initialize the DataVisualizer object.

        Parameters:
        - file_path (str): The path to the CSV file.
        """
        self.file_path = file_path
        self.condition = None
        self.threshold = None

    def read_csv(self):
        """
        Read the CSV file into a pandas DataFrame.

        Returns:
        - df (pandas.DataFrame): The DataFrame containing the data.
        """
        df = pd.read_csv(self.file_path)
        return df

    def select_columns(self, df, column_number):
        """
        Select the columns to be analyzed from the DataFrame.

        Parameters:
        - df (pandas.DataFrame): The DataFrame containing the data.
        - column_number (int): The starting column number.

        Returns:
        - selected_columns (pandas.Index): The selected column names.
        """
        selected_columns = df.columns[column_number:]
        return selected_columns

    def convert_to_numpy(self, df, selected_columns):
        """
        Convert the DataFrame to a numpy array.

        Parameters:
        - df (pandas.DataFrame): The DataFrame containing the data.
        - selected_columns (pandas.Index): The selected column names.

        Returns:
        - data (numpy.ndarray): The converted numpy array.
        """
        data = df[selected_columns].to_numpy()
        return data

    def count_rows_satisfying_condition(self, data, condition, threshold):
        """
        Count the number of rows satisfying a condition for each column.

        Parameters:
        - data (numpy.ndarray): The data array.
        - condition (str): The condition to be evaluated.
        - threshold (int, float, or str): The threshold value for the condition.

        Returns:
        - counts (numpy.ndarray): The counts for each column(latitude longitude).
        """
        condition_met = eval(f"data {condition} {threshold}")
        counts = np.sum(condition_met, axis=0)
        return counts

    def extract_coordinates(self):
        """
        Extract latitude, longitude coordinates, and counts from the data.

        Returns:
        - df_coordinates (pandas.DataFrame): The DataFrame containing the extracted coordinates and counts.
        """
        df = self.read_csv()
        data = {'latitude': [], 'longitude': [], 'count': []}

        for column_name in df.columns[1:]:
            matches = re.findall(r'\((.*?)\)', column_name)
            if len(matches) > 0:
                longitude, latitude = map(float, matches[0].split())
                data['latitude'].append(latitude)
                data['longitude'].append(longitude)

        data_array = self.convert_to_numpy(df, df.columns[1:])
        counts = self.count_rows_satisfying_condition(data_array, self.condition, self.threshold)
        data['count'] = counts.tolist()

        self.df_coordinates = pd.DataFrame(data)

        return self.df_coordinates

    def create_map(self):
        """
        Create a map visualization based on the extracted coordinates and counts.

        This method filters out points with a count value of 0, creates a density map plot using the filtered coordinates
        and counts, and saves the plot as an HTML file.

        Returns:
        - None
        """
        # Filter out points with count value of 0
        filtered_coordinates = self.df_coordinates[self.df_coordinates['count'] > 0]

        fig = go.Figure(data=go.Densitymapbox(
            lat=filtered_coordinates['latitude'],
            lon=filtered_coordinates['longitude'],
            z=filtered_coordinates['count'],
            radius=10,
            colorscale='Viridis',
            opacity=0.7))

        fig.update_layout(mapbox_style="open-street-map",
                          mapbox_center_lon=np.mean(filtered_coordinates['longitude']),
                          mapbox_center_lat=np.mean(filtered_coordinates['latitude']),
                          mapbox_zoom=10)

        # Save the plot as an HTML file
        offline.plot(fig, filename='heatmap.html', auto_open=True)

# Prompt the user for inputs
file_path = input("Enter the file path: ")
condition = input("Enter the condition: ")
threshold = float(input("Enter the threshold value: "))
column_number = int(input("Enter the column number corresponding to the 'sensor point': "))

# Create an instance of DataVisualizer
visualizer = DataVisualizer(file_path)
visualizer.condition = condition
visualizer.threshold = threshold

# Extract coordinates and create the heatmap
coordinates = visualizer.extract_coordinates()
visualizer.create_map()


Enter the file path: Output.csv
Enter the condition: >
Enter the threshold value: 15
Enter the column number corresponding to the 'sensor point': 1
