In [3]:
!pip freeze > requeriments.txt

In [18]:
from google.cloud import storage
import pandas as pd
import numpy as np
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
from typing import Sequence, Union



""" This function performs a batch prediction job using the specified AI Platform model,
    source and destination.Returns: output_files (List[str]): The names of the files in 
    the GCS destination location that contain the prediction results. """


def batch_prediction_job(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source: Union[str, Sequence[str]],
    gcs_destination: str,
    sync: bool = True,
    ):
    
    
    # Initialize the AI Platform client for the given project and location
    aiplatform.init(project=project, location=location)

    # Get the AI Platform model to use for prediction
    my_model = aiplatform.Model(model_resource_name)

    # Submit the batch prediction job using the model, source, and destination
    batch_prediction_job = my_model.batch_predict(
        job_display_name=job_display_name,
        gcs_source=gcs_source,
        gcs_destination_prefix=gcs_destination,
        sync=sync,
    )
    
    # Get the names of the prediction results files in the GCS destination location
    client = storage.Client(project=project)
    bucket = client.bucket(gcs_destination.split("/")[2])
    blobs = bucket.list_blobs(prefix=gcs_destination.split("/", 3)[-1])
    output_files = [blob.name for blob in blobs]
    
    return output_files


    
    
class Param:
    
    def __init__(self):
        # path to the input file
        self.file_input = 'gs://data_sentiment_v5/SSS/sss_onsite/on_site_2023/SSS DB Power BI - Sentimiento OnSite  1-5 Febrero 2023.xls
        # project ID 
        self.project_id = "284757810904"
        # model ID for off-site sentiment analysis
        self.model_id_off_site ="5835259941211865088"
        # model ID for on-site sentiment analysis
        self.model_id_on_site = "3580645377759510528"
        # input URI for batch predictions
        self.input_uri = "gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Main_Control_Batch/Prediction_Main_Batch.jsonl"
        # output URI for batch predictions
        self.output_uri = "gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Output_batch/"
        # location for cloud services
        self.location="us-central1"
        # display name for the batch prediction job
        self.job_display_name = "new_job"
        # path for processing batch data
        self.Processing_Batch = 'gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Prosecing_Masive_Data/'
        # path for main control batch data
        self.Path_Control = 'gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Main_Control_Batch/'
        # initial string for constructing the content URI
        self.complement_ini = "{\'content\' :\'gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Prosecing_Masive_Data/"
        # end string for constructing the content URI
        self.complement_end = ".txt\', \'mimeType\': \'text/plain\'}"
                               


class Input_Data:
    def __init__(self, Route):
        #Route (str): The path to the input file.
        self.Route = Route
    
    def Data_Sent_Filt(self):
        
    """Reads the input file and returns a DataFrame with the 
       columns "Id_Model" and "Message".Returns:Data (pandas.DataFrame):
       A DataFrame containing the columns "Id_Model" and "Message"."""
        
        df = pd.read_excel(self.Route)
        Data = df.loc[:,['Id_Model','Message']]
        return Data
    
    def Data_Sent_All(self):
        
        df = pd.read_excel(self.Route)
        return df

    
"""This object we will found the code that is in charge to convert
        and clean the data from the buckets"""

    
class Data_Frame:
    def __init__(self, Data):
        self.Data = Data
    
    def Transform(self):
        
        All_Clean = self.Data
        Sentimentnot = All_Clean[All_Clean.Message.notnull()]
        return Sentimentnot
    
    
"""This part we will found the code will  convert the sentiment 
    data in a generator to meke the reuest to the model """    

class Generator:
    def __init__(self, Sentiment,Path):
        
        self.Sentiment = Sentiment
        self.Path = Path
        
    def Iterator(self):
        
        df = self.Sentiment
        
        Sentiment_df = df['Message']
        
        General_list = Sentiment_df.to_numpy().tolist()
        
        New_df = pd.DataFrame (General_list, columns = ['Sentiment'])
        
        for i in range(len(New_df)):
            
            df_fn = (New_df.loc[i, 'Sentiment'])
            
            new_list = [df_fn]
            
            df_second = pd.DataFrame(new_list, columns = [''])
            
            df_second.to_csv(f"{self.Path}{i}.txt",index=False,header=False)
        
        return New_df 
    

        


"""With this code we generate paths to GET BATCH PREDICTION"""    
class Generate_path:
    def __init__(self, Senti_df,Path):
        
        self.Senti_df = Senti_df
        self.Path = Path
         
    def paths_jsl(self, a, b):
        
        
        df = self.Senti_df

        print(range(len(df)))
        
        new_list  = df.to_numpy().tolist()
        
        list_names = range(len(new_list))
        list_paths = [] 
        
        for i in list_names:
            c = str(i)
            Paths = a + c + b
            list_paths.append(Paths)
        
        New_df = pd.DataFrame (list_paths, columns = ['Paths_off_Sentiment'])
        New_df.to_csv(f'{self.Path}Prediction_Main_Batch.jsonl',index=False,header=False,quotechar=' ')
        return New_df
        
        
"""With this code we generate a list """
class generate_list:
        
    def create_list(self,files_user):
    
        my_list = []
    
        for i in range(1,files_user+1):
            my_list.append(i)
    
        formatter = "{:02d}".format
        my_list = list(map(formatter, my_list))
        
        return my_list
            


"""With this object we are gonna see the code to load the data from buckets,
        in this case the Data we will load is the sentiment"""

class Input_Result:
    def __init__(self, Result):

        self.Result = Result
    
    def Data_Out(self):
        columts = ['1', '2', '3','4','5','Order','Sentiment']
        Data = pd.read_csv(self.Result, names=columts,header=None, sep='/')
        return Data

    
    
    
"""Whit this object we will count the files that the job prediction has done before"""
    
    
def generator_0 (count_files):
    
    #Read  batch prediction
    
    for i in count_files:
        
        Results = Input_Result(i)
        Data_Result = Results.Data_Out()
        
    
        #organize and separate
        Data_Recl = Data_Result.loc[:,['Order','Sentiment']]
        
        yield Data_Recl
    
    
"""Whit this object we will extract the number of each sentiment"""
    
def generator_1(Data_Recl):
    
    df_num = pd.DataFrame()
    df_num = df_num.fillna(0)
    for i in Data_Recl.columns:
        
        df_num[i]=Data_Recl [i].str.extract('(\d+(?:\.\d+)?)')
    
    yield df_num


    
def cycle_1(primer_g,a,row_max,df_final):
    
    while a != row_max:
        
        #First state of sentiment
        
        state_gene_1 = next(primer_g)
        #print("First state is : OK")
    
        #Second state of sentiment
        segundo_g = generator_1(state_gene_1)
        state_gene_2 = next(segundo_g)
        #print("Second state is : OK")
    
        #merge the data frame
        df_final =pd.concat([df_final,state_gene_2],ignore_index=True)
        a = int(len(df_final))
        
    return df_final
    
    
    
    
        
"""This is the main code to start the program in this part we call the different objects"""

def run():
    
    #Call Params to find read data
    Params = Param()
    Route_m = Params.file_input
    print("The Params is: Ok")
    
    
    #Extract the Data Filt
    All_Data = Input_Data(Route_m)
    Data_1 = All_Data.Data_Sent_Filt()
    print("The Data is: Ok")
    
    
    #Extract the Sentiment 
    Extract = Data_Frame(Data_1)
    Data_Sentiment = Extract.Transform()
    print("The sentiment export to list is: Ok")
    
    
    #Generate Data files txt
    Variable_S = Generator(Data_Sentiment, Params.Processing_Batch)
    Sentiments = Variable_S.Iterator()
    print ("The sentiment already exported to txt : Ok ")
 
    
    #Generate list with paths of sentiments 
    list_sent = Generate_path(Data_Sentiment, Params.Path_Control)
    jsonl = list_sent.paths_jsl(Params.complement_ini, Params.complement_end)
    print ("The Paths  prediction exported : OK " )
    
    
    
    #Batch prediction of sentiments

    output_files= batch_prediction_job(Params.project_id, 
                         Params.location,                     
                         #change depend of the model offsite - onsite
                         Params.model_id_on_site, 
                         Params.job_display_name,
                         Params.input_uri,
                         Params.output_uri)
    
    print("Prediction Batch: ok")
    
    
    #fix files output batch prediction
    output_files.pop(0)
    string = "gs://batch_predictions_sentiment/"
    new_list = list(map(lambda x: string + str(x), output_files)) 
    print('files of batch is : ok')
    
    
    #Extract the  ALL Data 
    Data_2 = All_Data.Data_Sent_All()
    print("The Data Model is: Ok")
    
    
    #Extract the Sentiment 
    Extract = Data_Frame(Data_2)
    Data_Sentiment = Extract.Transform()
    print("The sentiment export to list is: Ok")
    
    #concat and organize the data of sentiment
    df_final = pd.DataFrame()
    primer_g = generator_0(new_list)
    a = 0
    row_max = (int(len(Data_Sentiment)))
    print("number of registres is: " + str(row_max))
    df_final = cycle_1(primer_g,a,row_max,df_final)
    
    #Order and index of dataframe
    print("Data_frame is complete: OK")
    df_order = df_final.sort_values('Order',ascending=True) 
    df_order['Order'] = df_order['Order'].astype(int)
    df_order = df_order.set_index('Order')
    key_df  = pd.merge(Data_Sentiment, df_order, left_index=True, right_index=True)
    
    #Rename columns
    key_df.rename(columns = {'Sentiment_y':'Sentiment_M', 'Sentiment_x':'Sentiment_H'}, inplace = True)
    
    #Export file
    key_df.to_excel("data/sentiment.xlsx",index=False)
    print("The file is  : OK")
    
if __name__ == '__main__':
    run()

The Params is: Ok
The Data Model is: Ok
The sentiment export to list is: Ok
117
gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Output_batch/prediction-bss_sss_onsite-2023-02-10T06:04:21.473696Z/predictions_00001.jsonl
First state is : OK
Second state is : OK
gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Output_batch/prediction-bss_sss_onsite-2023-02-10T06:04:21.473696Z/predictions_00002.jsonl
First state is : OK
Second state is : OK
Data_frame is complete: OK
The file is  : OK


Este código es un script en Python que permite realizar predicciones en masa de un modelo de Google Cloud AI Platform utilizando el API de batch prediction. La implementación se divide en diferentes clases que describen los diferentes componentes del código.

La clase batch_prediction_job es la función principal del código que realiza la predicción en masa. Toma como parámetros el proyecto y la ubicación en Google Cloud, el nombre del recurso del modelo, el nombre del trabajo de predicción en masa, la fuente de datos y la ubicación de destino de los resultados.

La clase Param define los parámetros que se utilizan en el código, incluido el nombre de proyecto, los identificadores de modelo, la ubicación, la ruta de entrada y la ruta de salida, así como ciertos parámetros relacionados con la manipulación de datos masivos.

La clase Input_Data se utiliza para cargar los datos de los buckets de Google Cloud Storage y puede acceder a los datos de sentimiento en un dataframe utilizando los métodos Data_Sent_Filt y Data_Sent_All.

La clase Data_Frame se utiliza para limpiar y transformar los datos de sentimiento antes de ser enviados al modelo.

La clase Generator convierte los datos de sentimiento en un generador para su uso en la predicción en masa.

En resumen, este código permite la carga y manipulación de datos de sentimiento de un bucket de Google Cloud Storage y su posterior predicción en masa utilizando un modelo de Google Cloud AI Platform.