In [77]:
"""
tool : 
  --> find_date_and_spectrum
  --> get_all_spectrum
  --> get_all_day
  --> extract_date_and_spectrum
  --> save_date_and_spectrum
  --> generate_plot
  --> fix_Meta
      --> fix_label
      --> fix_date
      --> fix_geomfeat
"""

'\ntool : \n  --> get_date_and_spectrum\n  --> get_all_spectrum\n  --> get_all_day\n  --> extract_date_and_spectrum\n  --> save_date_and_spectrum\n  --> generate_plot\n  --> fix_Meta\n      --> fix_label\n      --> fix_date\n      --> fix_geomfeat\n'

In [78]:
import numpy as np
import pandas as pd
class convert_data:
    def __init__(self,
                 df : pd.DataFrame, 
                 class_column : str,
                 start_date : str,
                 geomfeat_columns : tuple) -> None:
        
        self.df = df
        self.class_column = class_column
        self.start_date = start_date
        self.geomfeat_column = geomfeat_columns
        
        self.date_and_spectrum = extract_date_and_spectrum(df)
        self.all_spectrum = get_all_spectrum(self.date_and_spectrum)
        self.all_day = get_all_day(self.date_and_spectrum)
        
        
    def fit(self):
        for index in range(self.df.shape[0]):
            vector = list()
            for day in self.all_day:
                spectrum_vector = self._create_vector_spectrum()
                vector_day = self._make_vector_day(index, day, spectrum_vector)
                vector.append(vector_day)
                print(f"{index} --> {day}")
            vector = np.array(vector)
            return vector
                    
    def _make_vector_day(self, 
                        index : int,
                        day : int,
                        spectrum_vector : dict):
        for spectrum in spectrum_vector:
            if spectrum in self.date_and_spectrum[day]:
                column = f"{day}_{spectrum}"
                spectrum_vector[spectrum] = self.df.iloc[index][column]

        return np.array(
                        list(spectrum_vector.values())
                           )
                
            
        
    def _create_vector_spectrum(self):
        return {spectrum: np.nan for spectrum in self.all_spectrum}


In [79]:
def extract_date_and_spectrum(df : pd.DataFrame) -> dict:
    
    info = list(df.columns)
    stopwords =  [
        "CLASS",
        "Name",
        "DESCRIPTION", 
        "Id",
        "RAND", 
        "gridcode", 
        "X", 
        "Y"
    ]
    info = [_info for _info in info if _info not in stopwords]
    date_and_spectrum = dict()
    info = list(map(find_date_and_spectrum, info))
        
    for _info in info:
        day, spectrum = int(_info[0]) , _info[1] 
        if day in date_and_spectrum:
            date_and_spectrum[day].add(spectrum)
        elif day not in date_and_spectrum:
            date_and_spectrum[day] = set()
            date_and_spectrum[day].add(spectrum)  
                
    date_and_spectrum = dict(sorted(date_and_spectrum.items(), key=lambda item : item[0]))
    return date_and_spectrum

In [80]:
def find_date_and_spectrum(text : str) -> list:
    index = None
    for index_temp, char in enumerate(text):
        if char == "_":
            index = index_temp
            break
            
    return [
        text[ : index],
        text[index + 1 : ]
    ]                           

In [81]:
def get_all_spectrum(date_and_spectrum : dict) -> list:
    
    all_spectrum  = list()
    for _, spectrum in date_and_spectrum.items():
        all_spectrum.extend(list(spectrum))
    return list(set(all_spectrum))

In [82]:
def get_all_day(date_and_spectrum : dict) -> list:
    
    all_day  = list()
    for day, _ in date_and_spectrum.items():
        all_day.append(day)
    return all_day

In [83]:
def save_date_and_spectrum(date_and_spectrum : dict,
                           path_destination : str = "None") -> None:
    if path_destination == "None":
        file = open("info.txt", "w")

    else :
        path = path_destination + "/info.txt"
        file = open(path, "w")
        
    for day, spectrum in date_and_spectrum.items():
        spectrums = ""
        for temp  in spectrum:
            spectrums += temp + "-" 
        string = f"day : {day} - spectrums : {spectrums}"
        file.write(string + "\n")
        
    
    all_spectrum = get_all_spectrum(date_and_spectrum)
    string = f"number of unique spectrums are :‌ {len(all_spectrum)}"
    file.write(string)
    
    file.close()

In [84]:
def generate_plot(date_and_spectrum : dict) -> None:
    
    all_spectrum = get_all_spectrum(date_and_spectrum)
    
    result = {}
    for day , spectrum in date_and_spectrum.items():
        result[day] = len(all_spectrum) - len(spectrum)
            

    plt.figure(figsize = (30 , 10))
    plots = sns.barplot(x = list(result.keys())[-20 : ], y = list(result.values())[-20 : ], color='black')
    plots.set_xticklabels(plots.get_xticklabels(),rotation = 90)
    for bar in plots.patches:
        plots.annotate(format(bar.get_height(), '.0f'),
                        (bar.get_x() + bar.get_width() / 2,
                            bar.get_height()), ha='center', va='center',
                        size=15, xytext=(0, 8),
                        textcoords='offset points')
    

        
    if path_destination != "None":
        path_destination += "/plot.png"
    else:
        plt.savefig("plot.png")
        
    print(f"saved plot for date and spectrums on your directory : {path_destination}")

In [95]:
import pandas as pd

def fix_label(df: pd.DataFrame, class_column: str):
    labels = dict()
    for index in range(df.shape[0]):
        labels.update({str(index): df.iloc[index][class_column]})

    result = {
        f"label_{len(df[class_column].unique())}class": labels
    }
    return result


In [91]:
def fix_geomfeat(df : pd.DataFrame, 
                 class_column : str,
                 geomfeat_columns : tuple):
    geomfeat = dict()
    for index in range(df.shape[0]):
        geomfeat.update(
                    {str(index) : [df.iloc[index][geomfeat_columns[0]] , df.iloc[index][geomfeat_columns[1]]]}
                    )
    return geomfeat
    

In [96]:
import os

# Define the folder name
folder_name = "/home/reza/Desktop/test1"

# Create the folder
os.makedirs(folder_name, exist_ok=True)

# Get the absolute path of the folder
folder_path = os.path.abspath(folder_name)

print("Folder created:", folder_path)

Folder created: /home/reza/Desktop/test1


In [97]:
import json

# Example dictionary
data = {
    "name": "John",
    "age": 30,
    "city": "New York"
}

# File path to save the JSON file
file_path = "data.json"

# Save the dictionary to a JSON file
with open(file_path, "w") as json_file:
    json.dump(data, json_file, indent=4)

print(f"Dictionary saved to {file_path}")


Dictionary saved to data.json


In [51]:
def 

array([nan, nan])