In [1]:
!pip install openpyxl
!pip install openpyxl
!pip install requests
!pip install pandas-gbq --user



In [4]:
import pandas as pd
import numpy as np
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
from typing import Sequence, Union



def batch_prediction_job(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source: Union[str, Sequence[str]],
    gcs_destination: str,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    my_model = aiplatform.Model(model_resource_name)

    batch_prediction_job = my_model.batch_predict(
        job_display_name=job_display_name,
        gcs_source=gcs_source,
        gcs_destination_prefix=gcs_destination,
        sync=sync,
    )

    batch_prediction_job.wait()

    print(batch_prediction_job.display_name)
    print(batch_prediction_job.resource_name)
    print(batch_prediction_job.state)
    return batch_prediction_job


#####################################################################################################

"""This part we will found  all parameters that use on this program"""
    
    
class Param:
    
    def __init__(self):
         
        #Path Bucket
        self.file_input = 'data/sentiment.xlsx'
        
        #Params Request
        self.project_id = "284757810904"
        
        
        #change depend of the origin file
        self.model_id_off_site ="7628818492812165120"
                                #7628818492812165120
        
        #change depend of the origin file
        self.model_id_on_site = "6264227805718904832"
                                #6264227805718904832
        
        
        
        self.input_uri = "gs://batch_predictions_sentiment/prediction_masive_classification_text/Main_Control_Batch/Prediction_Main_Batch.jsonl" 
         
        self.output_uri = "gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data/"
        
        self.location="us-central1"
        
        self.job_display_name = "new_job"
        
        #Params masive data
        
        self.Processing_Batch = 'gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data/'
        
        self.Path_Control = 'gs://batch_predictions_sentiment/prediction_masive_classification_text/Main_Control_Batch/'
        
        
        self.complement_ini = "{\'content\' :\'gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data/"
        
        self.complement_end = ".txt\', \'mimeType\': \'text/plain\'}"
            
            
            

        """This part we are gonna see the code to load the data from buckets,
            in this case the Data we will load is the sentiment"""

class Input_Data:
    def __init__(self, Route):

        self.Route = Route
    
    def Data_Sent_Filt(self):
        
        df = pd.read_excel(self.Route)
        Data = df.loc[:,['Id_Model','Message']]
        return Data
    
    def Data_Sent_All(self):
        
        df = pd.read_excel(self.Route)
        return df
    
    
        
    """This part we will found the code that is in charge to convert
        and clean the data from the buckets"""

    
class Data_Frame:
    def __init__(self, Data):
        self.Data = Data
    
    def Transform(self):
        
        All_Clean = self.Data
        Sentimentnot = All_Clean[All_Clean.Message.notnull()]
        return Sentimentnot
    

    
    """This part we will found the code will  convert the sentiment 
    data in a generator to meke the reuest to the model """    

class Generator:
    def __init__(self, Sentiment,Path):
        
        self.Sentiment = Sentiment
        self.Path = Path
        
    def Iterator(self):
        
        df = self.Sentiment
        
        Sentiment_df = df['Message']
        
        General_list = Sentiment_df.to_numpy().tolist()
        
        New_df = pd.DataFrame (General_list, columns = ['Sentiment'])
        
        for i in range(len(New_df)):
            
            df_fn = (New_df.loc[i, 'Sentiment'])
            
            new_list = [df_fn]
            
            df_second = pd.DataFrame(new_list, columns = [''])
            
            df_second.to_csv(f"{self.Path}{i}.txt",index=False,header=False)
        
        return New_df 
    

        


"""With this code we generate paths to GET BATCH PREDICTION"""    
class Generate_path:
    def __init__(self, Senti_df,Path):
        
        self.Senti_df = Senti_df
        self.Path = Path
         
    def paths_jsl(self, a, b):
        
        
        df = self.Senti_df

        print(range(len(df)))
        
        new_list  = df.to_numpy().tolist()
        
        list_names = range(len(new_list))
        list_paths = [] 
        
        for i in list_names:
            c = str(i)
            Paths = a + c + b
            list_paths.append(Paths)
        
        New_df = pd.DataFrame (list_paths, columns = ['Paths_off_Sentiment'])
        New_df.to_csv(f'{self.Path}Prediction_Main_Batch.jsonl',index=False,header=False,quotechar=' ')
        return New_df
        
        
            

        


        
"""This is the main code to start the program in this part we call the different objects"""

def run():
    
    #Call Params to find read data
    Params = Param()
    Route_m = Params.file_input
    print("The Params is: Ok")
    
    
    #Extract the Data 
    All_Data = Input_Data(Route_m)
    
    Data = All_Data.Data_Sent_Filt()
    #print(Data)
    print("The Data is: Ok")
    
    
    #Extract the post
    Extract = Data_Frame(Data)
    Data_Sentiment = Extract.Transform()
    print("The sentiment export to list is: Ok")
    
    
    #Generate Data files txt
    Variable_S = Generator(Data_Sentiment, Params.Processing_Batch)
    Sentiments = Variable_S.Iterator()
    print ("The sentiment already exported to txt : Ok ")
 
    
    #Generate list with paths of sentiments 
    list_sent = Generate_path(Data_Sentiment, Params.Path_Control)
    jsonl = list_sent.paths_jsl(Params.complement_ini, Params.complement_end)
    print ("The Paths  prediction exported : OK " )
    
    
    #Batch prediction of sentiments
    
    batch_prediction_job(Params.project_id, 
                         Params.location,
                         
                         #change depend of the model offsite - onsite
                         Params.model_id_on_site, 
                         Params.job_display_name,
                         Params.input_uri,
                         Params.output_uri)
    
    print("Prediction Batch: ok")
    
    
    

    
if __name__ == '__main__':
    run()

The Params is: Ok
The Data is: Ok
The sentiment export to list is: Ok
The sentiment already exported to txt : Ok 
range(0, 2541)
The Paths  prediction exported : OK 
Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/284757810904/locations/us-central1/batchPredictionJobs/7311304411161034752
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/284757810904/locations/us-central1/batchPredictionJobs/7311304411161034752')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/7311304411161034752?project=284757810904
BatchPredictionJob projects/284757810904/locations/us-central1/batchPredictionJobs/7311304411161034752 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/284757810904/locations/us-central1/batchPredictionJobs/7311304411161034752 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/284757810904/locations/us-central1

In [5]:

    
        
"""This object we will found the code that is in charge to convert
        and clean the data from the buckets"""

    
class Data_Frame:
    def __init__(self, Data):
        self.Data = Data
    
    def Transform(self):
        
        All_Clean = self.Data
        Sentimentnot = All_Clean[All_Clean.Message.notnull()]
        return Sentimentnot
    
    
class generate_list:
        
    def create_list(self,files_user):
    
        my_list = []
    
        for i in range(1,files_user+1):
            my_list.append(i)
    
        formatter = "{:02d}".format
        my_list = list(map(formatter, my_list))
        
        return my_list
    
    
    
"""With this object we are gonna see the code to load the data from buckets,
        in this case the Data we will load is the sentiment"""

class Input_Result:
    def __init__(self, Result):

        self.Result = Result
    
    def Data_Out(self):
        colums = ['1', '2', '3','4','5','6','7',
                  '8','9','10','11','12','13','14',
                  '15','16','17','18','19','20']
        
        Data = pd.read_csv(self.Result, names=colums,header=None, sep=',')
        #print(Data.head(10))
        return Data

    
    
    
"""Whit this object we will count the files that the job prediction has done before"""
    
    
def generator_0 (count_files):
    
    
    
    for i in count_files:
            #Read  batch prediction
        Results = Input_Result(f'gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data/prediction-topic1_sss_onsite_single-2023-02-09T19:54:53.138392Z/predictions_000{i}.jsonl')
        
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #for each batch predicton we need to change the last prt of the input
        #/predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #https://storage.cloud.google.com/batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl
        #gs://batch_predictions_sentiment/prediction_masive_classification_text/Prosecing_Masive_Data//predictions_00001.jsonl

        
        Data_Result = Results.Data_Out()
        print(Data_Result.head())
        
    
        #organize and separate
        #generally to descompose the file off-site is 1-9 but is recomdable to print the files before to export
        #generally to descompose the file on-site is 1-8 but is recomdable to print the files before to export

        Data_Recl = Data_Result.loc[:,['1','8']]
        #print(Data_Recl)
        
        yield Data_Recl
    
    
"""Whit this object we will extract the number of each sentiment"""
    

def generator_1(Data_Recl):
    
    df_num = pd.DataFrame()
    #print(Data_Recl)
    df_num = df_num.fillna(0)
    
    for i in Data_Recl.columns:
        
        df_num[i]=Data_Recl [i].str.extract(r'(\d+(?:\.\d+)?)') 
    
    df_num = df_num.loc[:,['1']]
    yield df_num
    
    
def generator_2(Data_Recl):
    
    df_num = pd.DataFrame()
    df_num = df_num.fillna(0)
    #print(df)
    
    for i in Data_Recl.columns:
        
        #print(i)
        
        df_num[i]=Data_Recl [i].str.extract(r'(Institucional|Planes y servicios|Comunicación oficial|Menciones generales|Competidores)')
        
        #extract off_site(Comunicación oficial|Planes y servicios|Institucional|Acciones|Menciones generales|Noticias)
        #extract on_site(Institucional|Planes y servicios|Comunicación oficial|Menciones generales|Competidores)
        
        
        #Institucional
    df_num = df_num.loc[:,['8']]
    #print(df_num.head(10))
    yield df_num
    

def cycle_1(data_g,a,row_max,df_final):
    
    while a != row_max:
        
        #First state of sentiment
        state_gene_main = next(data_g)
        #print("First state is : OK")
    
        #Second state of sentiment
        
        primer_g = generator_1(state_gene_main)
        segundo_g = generator_2(state_gene_main)
        
        
        
        state_position_1 = next(primer_g)
        state_position_2 = next(segundo_g)
        #print("Second state is : OK")
    
        #merge the data frame
        df_merge = pd.merge(state_position_1, state_position_2, left_index=True, right_index=True)
        df_final =pd.concat([df_final,df_merge],ignore_index=True)
        a = int(len(df_final))
        
    return df_final







def run():
    
    #Params
    
    
    
    #Input number of files output on the bucket 
    #input_user = 10
    #print(f"El numero de archivos es {input_user}" )
    
    #Params to Create a  iteration to read the files
    Init_Params = generate_list()
    count_files = Init_Params.create_list(input_user) 
    print("The params is : OK")
    
    
    #Extract the Data 
    All_Data = Input_Data('data/sentiment.xlsx')
    Data = All_Data.Data_Sent()
    print("The Data Model is: Ok")
    
    
    
    #Extract the Sentiment 
    Extract = Data_Frame(Data)
    Data_Clasification = Extract.Transform()
    print("The sentiment export to list is: Ok")
    
    
    
    
    #concat and organize the position data
    df_final = pd.DataFrame()
    primer_g = generator_0(count_files)
    a = 0
    row_max = (int(len(Data_Clasification)))
    print (row_max)
    df_final = cycle_1(primer_g,a,row_max,df_final)
    
    

    
    
    #Order and index of dataframe
    print("Data_frame is complete: OK")
    df_order = df_final.sort_values('1',ascending=True) 
    df_order['1'] = df_order['1'].astype(int)
    df_order = df_order.set_index('1')
    key_df  = pd.merge(Data_Clasification, df_order, left_index=True, right_index=True)
    ##print(key_df)
    
    #preparate colums to DATA PREP
    key_df.rename(columns = {'8':'Clasification_M'}, inplace = True)
    #print(key_df)
    #print(key_df)
    
    
    key_df.to_excel("data/clasification.xlsx",index=False)
    #
    #key_df.to_csv('gs://batch_predictions_sentiment/prediction_masive_classification_text/Output_masive_prediction/on_site/On_site_last.csv',index=False)
    #batch_predictions_sentiment/prediction_masive_classification_text/Output_masive_prediction/on_site
    
    ##key_df.to_csv('gs://batch_predictions_sentiment/Prediction_Masive_Sentiment/Output_masive_prediction/on_site/On_site_last.csv',index=False)
    
    print("The file is  : OK")
    #key_df
    
    
if __name__ == '__main__':
    run()

El numero de archivos es 10
The params is : OK
The Data Model is: Ok
The sentiment export to list is: Ok
2541
                                                   1                       2  \
0  {"instance":{"content":"gs://batch_predictions...  mimeType:"text/plain"}   
1  {"instance":{"content":"gs://batch_predictions...  mimeType:"text/plain"}   
2  {"instance":{"content":"gs://batch_predictions...  mimeType:"text/plain"}   
3  {"instance":{"content":"gs://batch_predictions...  mimeType:"text/plain"}   
4  {"instance":{"content":"gs://batch_predictions...  mimeType:"text/plain"}   

                                          3                    4  \
0   prediction:{"ids":["909201008414949376"  3215044017628643328   
1   prediction:{"ids":["909201008414949376"  3215044017628643328   
2  prediction:{"ids":["5520887026842337280"  3215044017628643328   
3  prediction:{"ids":["3215044017628643328"   909201008414949376   
4  prediction:{"ids":["3215044017628643328"  7826730036056031232   

