In [4]:
import boto3
import datetime as dt
import json
import numpy as np
import pandas as pd
import snowflake.connector
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.options.display.float_format = '{:,.2f}'.format

In [5]:
from abc import ABCMeta, abstractmethod

class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
    

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_DEV","WORKSPACE")

def run_query(query):
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df



In [9]:
def extract_first_three_words(text):
    words = text.split()
    if len(words) >= 2 and len(words) >= 4:
        word=' '.join(words[:2])
    elif len(words) >= 4 and len(words) <=6:
        word=' '.join(words[:3])
    elif len(words) >= 7:
        word=' '.join(words[:4])
    else:
        word=text
    return word

def remove_words_starting_with_b(text):
    return ' '.join(word for word in text.split() if not word.startswith('('))

df_raw = pd.read_csv('US CROI import.csv')
df = df_raw[(df_raw.TITLE_ID.isnull()) & (df_raw['Assigned Person']=='Jeni')] 

# Apply the function to create the 'search_title' column
df['SOURCE_TITLE_NAME'] = df['SOURCE_TITLE_NAME'].astype(str)
df['search_title'] = df['SOURCE_TITLE_NAME'].str.replace(', The', '', case=False)
df['search_title'] = df['search_title'].str.replace('S1', '', case=False)
df['search_title'] = df['search_title'].str.replace('S2', '', case=False)
df['search_title'] = df['search_title'].str.replace('S3', '', case=False)

df['search_title'] = df['search_title'].apply(extract_first_three_words)
df['search_title'] = df['search_title'].apply(remove_words_starting_with_b)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [22]:
df_list=[]
search_list = df.search_title.unique().tolist()


for var in search_list:
    try:
#         query = f"SET var = '{var}'"
#         cur.execute(query)

        title_query=  f'''select 
        distinct series_title_long
        ,rad.legacy_hbomax_series_viewable_id, rad.season_number, rad.brand_names, rad.licensor
        , release_year
        , content_type
        from    "INT_DAI_PROD_SHARE"."CONTENT_METADATA_GOLD"."REPORTING_ASSET_DIM_COMBINED" rad
        join    "INT_DAI_PROD_SHARE"."CONTENT_METADATA_GOLD"."REPORTING_ASSET_OFFERING_DIM_COMBINED" raod
        on      rad.legacy_hbomax_viewable_id = raod.legacy_hbomax_viewable_id
        where   rad.asset_type = 'FEATURE'
        and     (contains (rad.series_title_long, $${var}$$) or rad.series_title_long ilike $${var}$$) '''.format(var = var)
        
#         print(title_query)
    
#     run_query(title_query)

        df_r =  run_query(title_query)
        df_r['search_title'] = var
        df_list.append(df_r)
    except Exception as e: 
        print(title_query)
        print (e)
    
df_final = pd.concat(df_list)

In [23]:
df_final

Unnamed: 0,series_title_long,legacy_hbomax_series_viewable_id,season_number,brand_names,licensor,release_year,content_type,search_title
0,The Way,GYUwSCA0AisMJggEAAAAd,,"[\n ""Unbranded""\n]","[\n ""Indie Rights""\n]",2010,,THE WAY
0,Tu Me Manques,GYLbMdAu9qbpQtgEAAAIH,,"[\n ""HBO""\n]",[],2021,movie,TU ME MANQUES
0,Tacoma FD,GXbDskgCPD3epwwEAAAPe,2.0,"[\n ""Tru TV""\n]",[],2020,episode,Tacoma FD
1,Tacoma FD,GXbDskgCPD3epwwEAAAPe,3.0,"[\n ""Tru TV""\n]",[],2021,episode,Tacoma FD
2,Tacoma FD,GXbDskgCPD3epwwEAAAPe,1.0,"[\n ""Tru TV""\n]",[],2019,episode,Tacoma FD
0,Troll: The Tale of a Tail,GYqHHNgUmqjpJjwEAAAAX,,"[\n ""Cartoon Network""\n]","[\n ""Sagatoon AS""\n]",2018,,Tale of
1,A Tale of Love and Desire,GYtf0fABmooWwbwEAAAAD,,"[\n ""Unbranded""\n]","[\n ""Arte France Cinéma"",\n ""Blue Monday Pro...",2021,,Tale of
2,Legendary Nights: The Tale of Gatti-Ward,GVg8Q1Q0THMPCAiAKAAAI,,"[\n ""HBO""\n]",[],2013,movie,Tale of
3,Tale of Tales,GYYSV0ggDMqIhIAEAAAAb,,"[\n ""Unbranded""\n]","[\n ""Puglia Film Commission"",\n ""Ministero p...",2015,,Tale of
4,The Tale of Despereaux,GYiOBdgDkfitLUQEAAAAP,,"[\n ""Unbranded""\n]","[\n ""Larger Than Life Productions"",\n ""Unive...",2008,,Tale of
