In [1]:
import os
import requests
import feedparser
import re
import numpy
from datetime import datetime, timezone
from ctransformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline
from keybert.llm import TextGeneration
from keybert import KeyLLM
from sentence_transformers import SentenceTransformer
from transformers import T5Tokenizer, T5ForConditionalGeneration
import spacy
from keybert import KeyBERT
import torch

endpoint = "https://active-monitor-48.hasura.app/v1/graphql"
admin_key = "bAQuK7HSYvMAp6S6pnqXH0wQlyuKNUICzoW3jwecc27pwz6COLhE750s5YAec7Hz"
nlp = spacy.load("en_core_web_trf")

gpu = spacy.prefer_gpu()
print(gpu)

def query_hasura_graphql(endpoint, admin_key, query, variables):
    headers = {
        'Content-Type': 'application/json',
        'x-hasura-admin-secret': f'{admin_key}'
    }

    data = {
        'query': query,
        'variables': variables
    }
    response = requests.post(endpoint, json=data, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Request failed with status code {response.status_code}")
        return None

def is_valid_timezone_format(published):
    try:
        # Attempt to parse the string
        date_format = "%a, %d %b %Y %H:%M:%S %z"
        date_object = datetime.strptime(published, date_format)

        hasura_timestamp = date_object.astimezone(timezone.utc).isoformat()
        return True, hasura_timestamp
    except ValueError:
        # If parsing fails, the string is not in the correct format
        return False, None

def check_date_format(date_string):
    try:
        datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%S%z')
        return True
    except ValueError:
        return False

def mutation_hasura_graphql(endpoint, admin_key, mutation_query, mutation_variables):
    headers = {
        'Content-Type': 'application/json',
        'x-hasura-admin-secret': f'{admin_key}'
    }
    response = requests.post(endpoint, json={'query': mutation_query, 'variables': mutation_variables}, headers=headers)
    if response.ok:
        data = response.json()
        print(data)
        return True, data
    else:
        print(f"Mutation failed with status code {response.status_code}: {response.text}")
        return False, None

def update_articles():
    graphql_query = '''
    query MyQuery($link_type: Int!) {
        synopse_articles_t_v1_rss1_feed_links(where: {rss1_link_type: {_eq: $link_type}}) {
          rss1_link
          rss1_link_name
          outlet
        }
      }
    '''
    # Define the variables dictionary
    variables = {
        "link_type": 11
    }
    rss1_links_array = []
    rss1_link_name = []
    outlet = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    if response_data:
        rss1_links_array = [item["rss1_link"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
        outlet = [item["outlet"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
        rss1_link_name = [item["rss1_link_name"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
    mutation_query = """
    mutation MyMutation($objects: [synopse_articles_t_v1_rss1_articles_insert_input!] = {}) {
        insert_synopse_articles_t_v1_rss1_articles(objects: $objects, on_conflict: {constraint: t_v1_rss1_articals_post_link_key}) {
            affected_rows
        }
    }
    """
    for i in range(0,len(rss1_links_array)):
        NewsFeed = feedparser.parse(rss1_links_array[i])
        print("############################################################")
        print(rss1_links_array[i])
        articles = []
        for entry in NewsFeed.entries:
            if outlet[i] +"." in entry.link:
              is_default_image = 0
              title = entry.title
              summary = ''
              if 'summary' in entry:
                  summary_nofil = entry.summary
                  summary = re.sub('<[^<]+?>', '', summary_nofil)
              image_url = ""
              if 'media_content' in entry:
                  image_url = entry['media_content'][0]['url']
                  is_default_image = 1
              if 'links' in entry:
                  for link in entry.links:
                      if link.type == "image/jpeg":
                          image_url= link.href
                          is_default_image = 1
                          break
              post_link = entry.link
              published = datetime.now(timezone.utc).isoformat()
              if 'published' in entry:
                  published = entry.published
              datevalidation = is_valid_timezone_format(published)
              if datevalidation[0]:
                  hasura_timestamp = datevalidation[1]
              if check_date_format(published):
                  hasura_timestamp = published
              else:
                  hasura_timestamp = datetime.now().astimezone(timezone.utc).isoformat()
              if "author" in entry:
                  author = entry.author
              else:
                  author = "na"
              tags = []
              tags.append(rss1_link_name[i])
              tags.append(outlet[i])
              if 'tags' in entry:
                  for tag in entry.tags:
                    tags.append(tag.term)
              if outlet[i] in post_link:
                  articles.append({
                          "rss1_link": rss1_links_array[i],
                          "post_link": post_link,
                          "title": title,
                          "summary": summary,
                          "author": author,
                          "image_link" : image_url,
                          "post_published": hasura_timestamp,
                          "is_default_image": is_default_image,
                          "tags": tags,
                      }
                  )
            #print(feed_link, post_link, title, summary, author, image_url, hasura_timestamp, is_default_image)
        mutation_variables = {
            "objects": articles
        }
        #print({'query': mutation_query, 'variables': mutation_variables})
        out1 = mutation_hasura_graphql(endpoint = endpoint, admin_key = admin_key, mutation_query = mutation_query, mutation_variables = mutation_variables)

def summerizer(offset1):
  # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
  model = AutoModelForCausalLM.from_pretrained(
      "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
      model_file="mistral-7b-instruct-v0.1.Q8_0.gguf",
      model_type="mistral",
      gpu_layers=110,
      hf=True,
      context_length=4000
  )
  # Tokenizer
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

  # Pipeline
  generator = pipeline(
      model=model,
      tokenizer=tokenizer,
      task='text-generation',
      max_new_tokens=2000,
      min_new_tokens=200,
      repetition_penalty=1.1
  )
  graphql_query = '''
  query MyQuery($offset: Int = 0, $limit: Int = 10) {
    synopse_articles_t_v1_rss1_articles(offset: $offset, where: {is_in_detail: {_eq: 1}, is_summerized: {_eq: 0}}, order_by: {created_at: desc}, limit: $limit) {
      title
      summary
      t_v1_rss1_articles_detail {
        description
      }
      id
    }
  }
  '''
  offset = offset1
  mutation_query = """
  mutation MyMutation($objects: [synopse_articles_t_v2_articles_summary_insert_input!] = {}, $updates: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}) {
    insert_synopse_articles_t_v2_articles_summary(objects: $objects, on_conflict: {constraint: t_v2_articles_summary_article_id_key}) {
      affected_rows
    }
    update_synopse_articles_t_v1_rss1_articles_many(updates: $updates) {
      affected_rows
    }
  }
  """
  while True:
    variables = {
    "limit": 2,
    "offset": offset
    }
    synopse_articles_t_v2_articles_summary_insert_input_loc = []
    update_synopse_articles_t_v1_rss1_articles_many_loc = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    if len(response_data['data']['synopse_articles_t_v1_rss1_articles']) == 0:
        break
    for response in response_data['data']['synopse_articles_t_v1_rss1_articles']:
      title = response['title']
      summary = response['title']
      article_id = response['id']
      description = ', '.join(response['t_v1_rss1_articles_detail']['description'])
      keyword_prompt = """
      [INST]
      I have the following article that I'd like you to summarize:

      Title: [TITLE]
      Summary: [SUMMARY]
      Description: [DESCRIPTION]

      Please provide a concise summary of the article based on the information provided in the title, summary, and description. The summary should be approximately 300 words in length.
      Make sure you to only return the summary and say nothing else. For example, don't say:
      "Here are the summary of the article"
      [/INST]
      """
      # Replace placeholders with actual data
      filled_prompt = keyword_prompt.replace("[TITLE]", title).replace("[SUMMARY]", summary).replace("[DESCRIPTION]", description)
      tokens = tokenizer.encode(filled_prompt)
      token_count = len(tokens)
      if token_count > 2000:
        description_tokens = tokenizer.encode(description)
        description_token_count = len(description_tokens)
        req_tokens = description_token_count - (token_count - 2000)
        tokens = tokenizer.encode(description)[:req_tokens]

        # Decode the tokens back into a string
        description = tokenizer.decode(tokens)
        filled_prompt = keyword_prompt.replace("[TITLE]", title).replace("[SUMMARY]", summary).replace("[DESCRIPTION]", description)
      response1 = generator(filled_prompt)# Get the generated text
      generated_text = response1[0]["generated_text"]
      start_index = generated_text.find("[/INST]") + len("[/INST]")
      summary = generated_text[start_index:].strip()
      print(summary)
      synopse_articles_t_v2_articles_summary_insert_input_loc.append({
        "article_id": article_id,
        "summary": summary,
        }
        )
      update_synopse_articles_t_v1_rss1_articles_many_loc.append({
        "where": {"id" : { "_eq": article_id }},
        "_set": {"is_summerized": 1}
        })
    mutation_variables = {
        "objects": synopse_articles_t_v2_articles_summary_insert_input_loc,
        "updates": update_synopse_articles_t_v1_rss1_articles_many_loc,
        }
    out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)
    print(out1)

def ner_tagging(offset1):
  query = """
  query MyQuery($limit: Int!, $offset: Int!) {
    synopse_articles_t_v1_rss1_articles(where: {is_summerized: {_eq: 1}, is_ner_tagged: {_eq: 0}}, limit: $limit, offset: $offset, order_by: {created_at: desc}) {
      title
      summary
      id
      t_v2_articles_summary {
        summary
        person_tags
        location_tags
        keywords_tags
        org_tags
      }
      t_v1_rss1_articles_detail {
        description
      }
    }
  }
  """
  offset = offset1
  mutataion_query = """
  mutation MyMutation($updates: [synopse_articles_t_v2_articles_summary_updates!] = {where: {}}, $updates1: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}) {
    update_synopse_articles_t_v2_articles_summary_many(updates: $updates) {
      affected_rows
    }
    update_synopse_articles_t_v1_rss1_articles_many(updates: $updates1) {
      affected_rows
    }
  }
  """
  while True:
      variables = {
          "limit": 2,
          "offset": offset
      }
      synopse_articles_t_v2_articles_summary_updates_loc = []
      synopse_articles_t_v1_rss1_articles_updates_loc = []
      response_data = query_hasura_graphql(endpoint=endpoint, admin_key=admin_key, query=query, variables=variables)
      if len(response_data['data']['synopse_articles_t_v1_rss1_articles']) == 0:
          break
      for response in response_data['data']['synopse_articles_t_v1_rss1_articles']:
          title = response['title']
          summary = response['summary']
          summary2 = response['t_v2_articles_summary']['summary']
          description = ", ".join(response['t_v1_rss1_articles_detail']['description'])
          text = title + " " + summary + " " + summary2 + " " + description
          doc = nlp(text)
          kw_model = KeyBERT()
          num_words = len(text.split())
          top_keywords = int(num_words/50)
          if top_keywords < 15:
              top_keywords = 15
          elif top_keywords > 30:
              top_keywords = 30
          if response['t_v2_articles_summary']['person_tags'] is None:
              person = []
          else:
              person = response['t_v2_articles_summary']['person_tags']
          if response['t_v2_articles_summary']['location_tags'] is None:
              loc = []
          else:
              loc = response['t_v2_articles_summary']['location_tags']
          if response['t_v2_articles_summary']['keywords_tags'] is None:
              keys = []
          else:
              keys = response['t_v2_articles_summary']['keywords_tags']
          if response['t_v2_articles_summary']['org_tags'] is None:
              orgs = []
          else:
              orgs = response['t_v2_articles_summary']['org_tags']
          keywords = kw_model.extract_keywords(text, top_n=int(top_keywords))
          keys_bert = [keyword[0] for keyword in keywords]
          for key in keys_bert:
              keys.append(key)
          for ent in doc.ents:
              if ent.label_ == "GPE" or ent.label_ == "LOC" or ent.label_ == "NORP" or ent.label_ == "FAC":
                  loc.append(ent.text)
              elif ent.label_ == "PERSON":
                  person.append(ent.text)
              elif ent.label_ == "ORG ":
                  orgs.append(ent.text)
              elif ent.label_ == "EVENT" or ent.label_ == "WORK_OF_ART" or ent.label_ == "PRODUCT" or ent.label_ == "LAW":
                  keys.append(ent.text)
          loc = [item.lower() for item in loc]
          person = [item.lower() for item in person]
          key = [item.lower() for item in keys]
          org = [item.lower() for item in orgs]
          loc = list(set(loc))
          person = list(set(person))
          key = list(set(key))
          org = list(set(org))
          synopse_articles_t_v2_articles_summary_updates_loc.append({
                  "where": {"article_id" : { "_eq": response['id'] }},
                  "_set": {"location_tags": loc, "person_tags": person, "keywords_tags": key , "org_tags": org}
                  })
          synopse_articles_t_v1_rss1_articles_updates_loc.append({
                  "where": {"id" : { "_eq": response['id'] }},
                  "_set": {"is_ner_tagged": 1}
                  })
      mutation_variables = {
          "updates": synopse_articles_t_v2_articles_summary_updates_loc,
          "updates1": synopse_articles_t_v1_rss1_articles_updates_loc
      }
      out = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutataion_query, mutation_variables=mutation_variables)


def ai_tagging(offset1):
  device = 0 if torch.cuda.is_available() else -1  # Use GPU if available, else CPU
  print(device)
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=device)
  graphql_tags_query = '''
  query MyQuery {
    synopse_articles_t_v4_tags_hierarchy(where: {is_valid: {_eq: 1}}) {
      id
      tag
      tag_hierachy
    }
  }
  '''
  tags_main = []
  tags_ids = []
  variables_tags = {
  }
  response_data_tags = query_hasura_graphql(endpoint, admin_key, graphql_tags_query, variables_tags)
  tags_main = []
  tags_all = []
  for response in response_data_tags['data']['synopse_articles_t_v4_tags_hierarchy']:
    if response['tag_hierachy'] == 0:
      tags_main.append(response['tag'])
      tags_ids.append(response['id'])
    else:
      tags_all.append(response['tag'])
      #print(response['tag'], response['id'])
  tags_all = list(set(tags_all))
  tags_l1 = ", ".join(tags_main)
  query = """
  query MyQuery($limit: Int!, $offset: Int!) {
    synopse_articles_t_v1_rss1_articles(where: {is_summerized: {_eq: 1}, is_ner_tagged: {_eq: 1}, is_ai_tagged: {_eq: 0}}, limit: $limit, offset: $offset, order_by: {created_at: desc}) {
      title
      summary
      id
      t_v2_articles_summary {
        summary
        person_tags
        location_tags
        keywords_tags
        org_tags
      }
    }
  }
  """
  offset = offset1
  mutataion_query = """
  mutation MyMutation($updates: [synopse_articles_t_v2_articles_summary_updates!] = {where: {}}, $updates1: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}) {
    update_synopse_articles_t_v2_articles_summary_many(updates: $updates) {
      affected_rows
    }
    update_synopse_articles_t_v1_rss1_articles_many(updates: $updates1) {
      affected_rows
    }
  }
    """
  while True:
      variables = {
          "limit": 2,
          "offset": offset
      }
      synopse_articles_t_v2_articles_summary_updates_loc = []
      synopse_articles_t_v1_rss1_articles_updates_loc = []
      response_data = query_hasura_graphql(endpoint=endpoint, admin_key=admin_key, query=query, variables=variables)
      if len(response_data['data']['synopse_articles_t_v1_rss1_articles']) == 0:
          break
      for response in response_data['data']['synopse_articles_t_v1_rss1_articles']:
          title = response['title']
          summary = response['summary']
          summary2 = response['t_v2_articles_summary']['summary']
          if response['t_v2_articles_summary']['person_tags'] is None:
              person_tags = " "
          else:
              person_tags = ", ".join(response['t_v2_articles_summary']['person_tags'])
          if response['t_v2_articles_summary']['location_tags'] is None:
              location_tags = " "
          else:
              location_tags = ", ".join(response['t_v2_articles_summary']['location_tags'])
          if response['t_v2_articles_summary']['keywords_tags'] is None:
              keywords_tags = " "
          else:
              keywords_tags =  ", ".join(response['t_v2_articles_summary']['keywords_tags'])
          if response['t_v2_articles_summary']['org_tags'] is None:
              org_tags = " "
          else:
              org_tags =  ", ".join(response['t_v2_articles_summary']['org_tags'])
          text = title + " " + summary + " " + summary2 + " " + person_tags + " " + location_tags + " " + keywords_tags + " " + org_tags
          sequence_to_classify = text
          candidate_labels = tags_l1
          cls = classifier(sequence_to_classify, candidate_labels, multi_label=True)
          article_tags = []
          article_tags.append(cls['labels'][0])
          i1 = tags_main.index(article_tags[0])
          t1_index = tags_ids[i1]
          article_tags.append(cls['labels'][1])
          i2 = tags_main.index(article_tags[1])
          t2_index = tags_ids[i2]
          tags_sub=[]
          for responset in response_data_tags['data']['synopse_articles_t_v4_tags_hierarchy']:
            if responset['tag_hierachy'] == t1_index:
              tags_sub.append(responset['tag'])
          tags_sub = ", ".join(tags_sub)
          cls = classifier(sequence_to_classify, tags_sub, multi_label=True)
          article_tags.append(cls['labels'][0])
          article_tags.append(cls['labels'][1])
          tags_sub=[]
          for responset in response_data_tags['data']['synopse_articles_t_v4_tags_hierarchy']:
            if responset['tag_hierachy'] == t2_index:
              tags_sub.append(responset['tag'])
          tags_sub = ", ".join(tags_sub)
          cls = classifier(sequence_to_classify, tags_sub, multi_label=True)
          article_tags.append(cls['labels'][0])
          article_tags.append(cls['labels'][1])
          synopse_articles_t_v2_articles_summary_updates_loc.append({
                  "where": {"article_id" : { "_eq": response['id'] }},
                  "_set": {"ai_tags": article_tags}
                  })
          synopse_articles_t_v1_rss1_articles_updates_loc.append({
                  "where": {"id" : { "_eq": response['id'] }},
                  "_set": {"is_ai_tagged": 1}
                  })
      mutation_variables = {
          "updates": synopse_articles_t_v2_articles_summary_updates_loc,
          "updates1": synopse_articles_t_v1_rss1_articles_updates_loc
      }
      out = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutataion_query, mutation_variables=mutation_variables)

def vectorize(offset1):
  model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
  graphql_query = '''
  query MyQuery($limit: Int!, $offset: Int!) {
    synopse_articles_t_v1_rss1_articles(limit: $limit, offset: $offset, where: {is_summerized: {_eq: 1}, is_vectorized: {_eq: 0}, is_ai_tagged: {_eq: 1}}, order_by: {created_at: desc}) {
      id
      title
      summary
      t_v2_articles_summary {
        summary
        keywords_tags
        location_tags
        org_tags
        person_tags
        ai_tags
      }
      tags
    }
  }
  '''
  offset = offset1
  mutation_query = """
  mutation MyMutation($objects: [synopse_articles_t_v2_articles_vectors_insert_input!] = {}, $updates: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}) {
    insert_synopse_articles_t_v2_articles_vectors(objects: $objects, on_conflict: {constraint: t_v2_articles_vectors_article_id_key}) {
      affected_rows
    }
    update_synopse_articles_t_v1_rss1_articles_many(updates: $updates) {
      affected_rows
    }
  }
  """
  while True:
    variables = {
    "limit": 20,
    "offset": offset
    }
    synopse_articles_t_v2_articles_vectors_insert_input_loc = []
    synopse_articles_t_v1_rss1_articles_updates_loc = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    if len(response_data['data']['synopse_articles_t_v1_rss1_articles']) == 0:
        break
    p1 = []
    article_ids = []
    for response in response_data['data']['synopse_articles_t_v1_rss1_articles']:
      article_ids.append( response['id'] )
      tags  = ', '.join(response['tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['keywords_tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['location_tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['org_tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['person_tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['ai_tags'] ) + " " + ', '.join(response['t_v2_articles_summary']['ai_tags'] )
      p12 = response['title'] + "\n" + response['summary'] + "\n" + response['t_v2_articles_summary']['summary'] + tags
      p1.append(p12)

    embeddings = model.encode(p1)
    for i in range(0,len(article_ids)):
      synopse_articles_t_v2_articles_vectors_insert_input_loc.append({
          "article_id": article_ids[i],
          "a_vector":  str(embeddings[i].tolist()),
          }
          )
      synopse_articles_t_v1_rss1_articles_updates_loc.append({
          "where": {"id" : { "_eq": article_ids[i] }},
          "_set": {"is_vectorized": 1}
          })

    mutation_variables = {
        "objects": synopse_articles_t_v2_articles_vectors_insert_input_loc,
        "updates": synopse_articles_t_v1_rss1_articles_updates_loc,
        }
    out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

def grouping_l1(offset1):
    graphql_query = '''
    query MyQuery($offset: Int!, $limit: Int!) {
      synopse_articles_t_v1_rss1_articles(offset: $offset, limit: $limit, order_by: {created_at: desc}, where: {is_grouped: {_eq: 0}, is_vectorized: {_eq: 1}}) {
        id
      }
    }
    '''
    offset = offset1
    mutation_query = """
    mutation MyMutation($objects: [synopse_articles_t_v3_article_groups_l1_insert_input!] = {}, $updates: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}) {
      insert_synopse_articles_t_v3_article_groups_l1(objects: $objects, on_conflict: {constraint: t_v3_article_groups_l1_article_id_key}) {
        affected_rows
      }
      update_synopse_articles_t_v1_rss1_articles_many(updates: $updates) {
        affected_rows
      }
    }
    """
    func_query = '''
    query MyQuery($p_article_id: bigint!) {
      synopse_articles_f_get_similar_articles_group(args: {p_article_id: $p_article_id}) {
        article_id
      }
    }
    '''
    while True:
        variables = {
        "limit": 20,
        "offset": offset
        }
        synopse_articles_t_v3_article_groups_l1_insert_input_loc=[]
        synopse_articles_t_v1_rss1_articles_updates_loc=[]
        response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
        if len(response_data['data']['synopse_articles_t_v1_rss1_articles']) == 0:
            break
        s1= []
        ids=[]
        for response in response_data['data']['synopse_articles_t_v1_rss1_articles']:
            func_variables = {
                "p_article_id": response['id']
                }
            func_response_data = query_hasura_graphql(endpoint, admin_key, func_query, func_variables)
            article_group = []
            #print(json.dumps(func_response_data, indent=4))
            if len(func_response_data['data']['synopse_articles_f_get_similar_articles_group']) > 0:
                for func_response in func_response_data['data']['synopse_articles_f_get_similar_articles_group']:
                    article_group.append(func_response['article_id'])

            synopse_articles_t_v3_article_groups_l1_insert_input_loc.append({
                "article_id": response['id'],
                "initial_group": article_group,
                "article_count": len(article_group)
                }
                )
            synopse_articles_t_v1_rss1_articles_updates_loc.append({
                "where": {"id" : { "_eq": response['id'] }},
                "_set": {"is_grouped": 1}
                })
        mutation_variables = {
        "objects": synopse_articles_t_v3_article_groups_l1_insert_input_loc,
        "updates": synopse_articles_t_v1_rss1_articles_updates_loc,
        }
        out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

def grouping_l2(offset1):
    graphql_query = '''
    query MyQuery($limit: Int!, $offset: Int!) {
      synopse_articles_t_v3_article_groups_l1(where: {t_v1_rss1_article: {is_grouped: {_eq: 1}}, article_count: {_gt: 1}}, order_by: {updated_at: asc}, limit: $limit, offset: $offset) {
        article_id
        initial_group
      }
    }
    '''
    query2 = '''
    query MyQuery($articleid: [bigint!] = []) {
        synopse_articles_t_v3_article_groups_l1(where: {initial_group: {_contains: $articleid}}) {
        article_id
        initial_group
        }
    }
    '''
    query3 = '''
    query MyQuery($articleid: [bigint!] = []) {
        synopse_articles_t_v3_article_groups_l2(where: {articles_group: {_contains: $articleid}}, order_by: {is_valid: desc, updated_at: desc}) {
        id
        articles_group
        is_valid
        }
    }
    '''
    mutation_query = """
    mutation MyMutation($objects: [synopse_articles_t_v3_article_groups_l2_insert_input!] = {}, $updates: [synopse_articles_t_v1_rss1_articles_updates!] = {where: {}}, $updates1: [synopse_articles_t_v3_article_groups_l2_updates!] = {where: {}}, $articleGroupIds: [bigint!] = "") {
      insert_synopse_articles_t_v3_article_groups_l2(objects: $objects, on_conflict: {constraint: t_v3_article_groups_l2_pkey}) {
        affected_rows
      }
      update_synopse_articles_t_v1_rss1_articles_many(updates: $updates) {
        affected_rows
      }
      update_synopse_articles_t_v3_article_groups_l2_many(updates: $updates1) {
        affected_rows
      }
      delete_synopse_articles_t_v3_article_groups_l2(where: {id: {_in: $articleGroupIds}}) {
        affected_rows
      }
    }
    """
    while True:
        variables = {
        "limit": 1,
        "offset": 0
        }
        response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
        synopse_articles_t_v3_article_groups_l2_insert_input_loc=[]
        synopse_articles_t_v1_rss1_articles_updates_loc=[]
        synopse_articles_t_v3_article_groups_l2_updates_loc=[]
        articleGroupIds_loc=[]
        if len(response_data['data']['synopse_articles_t_v3_article_groups_l1']) == 0:
            break
        for response in response_data['data']['synopse_articles_t_v3_article_groups_l1']:
            variables2 = {
                "articleid": [response['article_id']]
                }
            response_data1 = query_hasura_graphql(endpoint, admin_key, query2, variables2)
            articles_ids = []
            if len(response_data1['data']['synopse_articles_t_v3_article_groups_l1']) > 0:
                for func_response in response_data1['data']['synopse_articles_t_v3_article_groups_l1']:
                    articles_ids.append(func_response['initial_group'])
            n1 = []
            for sublist in articles_ids:
                for element in sublist:
                    n1.append(element)
            articles_ids = list(set(n1))
            articles_ids.sort(reverse=True)
            while True:
                if len(articles_ids) > 1:
                    articles_news = []
                    for article_id in articles_ids:
                        variables3 = {
                            "articleid": [article_id]
                            }
                        response_data2 = query_hasura_graphql(endpoint, admin_key, query2, variables3)
                        if len(response_data1['data']['synopse_articles_t_v3_article_groups_l1']) > 0:
                            for func_response in response_data1['data']['synopse_articles_t_v3_article_groups_l1']:
                                articles_news.append(func_response['initial_group'])
                    n2 = []
                    for sublist in articles_news:
                        for element in sublist:
                            n2.append(element)
                    articles_news = list(set(n2))
                    articles_news.sort(reverse=True)
                    if articles_ids == articles_news:
                        break
                    else:
                        articles_ids = articles_news
            n5 = []
            for a1 in articles_ids:
                variables3 = {
                    "articleid": [a1]
                    }
                response_data2 = query_hasura_graphql(endpoint, admin_key, query3, variables3)
                if len(response_data2['data']['synopse_articles_t_v3_article_groups_l2']) > 0:
                    for func_response in response_data2['data']['synopse_articles_t_v3_article_groups_l2']:
                        n5.append(func_response['articles_group'])
                        if func_response['is_valid'] == 0:
                            articleGroupIds_loc.append(func_response['id'])
            n5.append(articles_ids)
            n2 = []
            for sublist in n5:
                for element in sublist:
                    n2.append(element)
            articles_ids = list(set(n2))
            articles_ids.sort(reverse=True)
            synopse_articles_t_v3_article_groups_l2_insert_input_loc.append({
                "articles_group": articles_ids,
                'articles_in_group': len(articles_ids)
                }
                )
            for article in articles_ids:
                synopse_articles_t_v1_rss1_articles_updates_loc.append({
                    "where": {"id" : { "_eq": article }},
                    "_set": {"is_grouped": 2}
                    })
        mutation_variables = {
            "objects": synopse_articles_t_v3_article_groups_l2_insert_input_loc,
            "updates": synopse_articles_t_v1_rss1_articles_updates_loc,
            "updates1": synopse_articles_t_v3_article_groups_l2_updates_loc,
            "articleGroupIds": articleGroupIds_loc
            }
        out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)


def gen_article(offset1):
    # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
    model = AutoModelForCausalLM.from_pretrained(
        "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        model_file="mistral-7b-instruct-v0.1.Q8_0.gguf",
        model_type="mistral",
        gpu_layers=110,
        hf=True,
        context_length=8000
    )
    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

    # Pipeline
    generator = pipeline(
        model=model,
        tokenizer=tokenizer,
        task='text-generation',
        max_new_tokens=6000,
        min_new_tokens=400,
        repetition_penalty=1.1
    )
    graphql_query = '''
    query MyQuery($limit: Int!, $offset: Int!) {
      synopse_articles_t_v3_article_groups_l2(where: {is_valid: {_eq: 0}, is_summerized: {_eq: 0}, articles_in_group: {_gte: 2}}, offset: $offset, limit: $limit, order_by: {created_at: desc}) {
        articles_group
        id
      }
    }

    '''
    graphql_query_article = '''
    query MyQuery($articles: [bigint!] = []) {
      synopse_articles_t_v1_rss1_articles(where: {id: {_in: $articles}}, order_by: {created_at: desc}) {
        title
        summary
        t_v2_articles_summary {
          summary
        }
      }
    }
    '''
    offset = offset1
    mutation_query = """
    mutation MyMutation($objects: [synopse_articles_t_v4_article_groups_l2_detail_insert_input!] = {}, $updates: [synopse_articles_t_v3_article_groups_l2_updates!] = {where: {}}) {
      insert_synopse_articles_t_v4_article_groups_l2_detail(objects: $objects, on_conflict: {constraint: t_v4_article_groups_l2_detail_article_group_id_key}) {
        affected_rows
      }
      update_synopse_articles_t_v3_article_groups_l2_many(updates: $updates) {
        affected_rows
      }
    }
    """
    while True:
        variables = {
        "limit": 1,
        "offset": offset
        }
        response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
        if len(response_data['data']['synopse_articles_t_v3_article_groups_l2']) == 0:
            break
        for response in response_data['data']['synopse_articles_t_v3_article_groups_l2']:
            llm_text = ''
            synopse_articles_t_v4_article_groups_l2_detail_insert_input_loc=[]
            synopse_articles_t_v3_article_groups_l2_updates_loc=[]
            variables1 = {
            "articles": response['articles_group']
            }
            response_data1 = query_hasura_graphql(endpoint, admin_key, graphql_query_article, variables1)
            titles = ""
            summarys = ""
            summarys_detailed = ""
            for response1 in response_data1['data']['synopse_articles_t_v1_rss1_articles']:
                titles = titles + response1['title']
                summarys =summarys + response1['summary']
                summarys_detailed = summarys_detailed + response1['t_v2_articles_summary']['summary']
            keyword_prompt = """
            [INST]
            I have the following article that I'd like you to summarize:

            Titles: [TITLE]
            Summarys: [SUMMARY]
            Summarys_detailed: [SUMMARYDETAILED]


            Please provide a concise UNBIASED news article based on the information provided in the title, Summarys, and Summarys_detailed.
            The summary should be approximately 500 words in length.
            Make sure you to only return the article wit atleast 3 paragraphsand say nothing else. For example, don't say:
            "Here are the article title or article"
            [/INST]
            """
            # Replace placeholders with actual data
            filled_prompt = keyword_prompt.replace("[TITLE]", titles).replace("[SUMMARY]", summarys).replace("[SUMMARYDETAILED]", summarys_detailed)
            tokens = tokenizer.encode(filled_prompt)
            token_count = len(tokens)
            if token_count > 5000:
              filled_prompt = keyword_prompt.replace("[TITLE]", titles).replace("[SUMMARY]", "").replace("[SUMMARYDETAILED]", summarys_detailed)
            tokens = tokenizer.encode(filled_prompt)
            token_count = len(tokens)
            if token_count > 5000:
              summarys_detailed_tokens = tokenizer.encode(summarys_detailed)
              summarys_detailed_token_count = len(summarys_detailed_tokens)
              req_tokens = summarys_detailed_token_count - (token_count - 5000)
              tokens = tokenizer.encode(summarys_detailed)[:req_tokens]

              # Decode the tokens back into a string
              summarys_detailed = tokenizer.decode(tokens)
              filled_prompt = keyword_prompt.replace("[TITLE]", titles).replace("[SUMMARY]", summarys).replace("[SUMMARYDETAILED]", summarys_detailed)
            #print(filled_prompt)

            response1 = generator(filled_prompt)# Get the generated text
            generated_text = response1[0]["generated_text"]
            start_index = generated_text.find("[/INST]") + len("[/INST]")
            article_final = generated_text[start_index:].strip()
            print(article_final)
            synopse_articles_t_v4_article_groups_l2_detail_insert_input_loc.append({
              "article_group_id": response['id'],
              "summary": article_final,
              }
              )
            synopse_articles_t_v3_article_groups_l2_updates_loc.append({
              "where": {"id" : { "_eq": response['id'] }},
              "_set": {"is_summerized": 1 , "is_valid": 1}
              })
        mutation_variables = {
        "objects": synopse_articles_t_v4_article_groups_l2_detail_insert_input_loc,
        "updates": synopse_articles_t_v3_article_groups_l2_updates_loc,
        }
        out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

def gen_title(offset1):
    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
    model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
    graphql_query = '''
    query MyQuery($limit: Int!, $offset: Int!) {
      synopse_articles_t_v4_article_groups_l2_detail(where: {title: {_is_null: true}}, limit: $limit, offset: $offset, order_by: {created_at: desc}) {
        summary
        article_group_id
      }
    }
    '''
    offset = offset1
    mutation_query = """
    mutation MyMutation($updates: [synopse_articles_t_v4_article_groups_l2_detail_updates!] = {where: {}}) {
      update_synopse_articles_t_v4_article_groups_l2_detail_many(updates: $updates) {
        affected_rows
      }
    }
    """
    while True:
        variables = {
        "limit": 1,
        "offset": offset
        }
        response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
        if len(response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']) == 0:
            break
        for response in response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']:
            llm_text = ''
            synopse_articles_t_v4_article_groups_l2_detail_updates_loc=[]
            input_text = "generate a intresting and viral news title for:  " + "\n" + response['summary']
            max_length = 512
            input_text = input_text[:max_length]
            input_ids = tokenizer.encode(input_text, return_tensors="pt")
            outputs = model.generate(input_ids, max_new_tokens=50)
            generated_text = tokenizer.decode(outputs[0])
            clean_text = re.sub('<.*?>', '', generated_text) # remove data between < and >
            print(clean_text)
            synopse_articles_t_v4_article_groups_l2_detail_updates_loc.append({
              "where": {"article_group_id" : { "_eq": response['article_group_id'] }},
              "_set": {"title": clean_text }
              })
        mutation_variables = {
        "updates": synopse_articles_t_v4_article_groups_l2_detail_updates_loc,
        }
        out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)


def gen_summary_60_words(offset1):
     # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
    model = AutoModelForCausalLM.from_pretrained(
        "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        model_file="mistral-7b-instruct-v0.1.Q8_0.gguf",
        model_type="mistral",
        gpu_layers=110,
        hf=True,
        context_length=8000
    )
    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

    # Pipeline
    generator = pipeline(
        model=model,
        tokenizer=tokenizer,
        task='text-generation',
        max_new_tokens=6000,
        min_new_tokens=80,
        repetition_penalty=1.1
    )
    graphql_query = '''
    query MyQuery($limit: Int!, $offset: Int!) {
      synopse_articles_t_v4_article_groups_l2_detail(where: {summary_60_words: {_is_null: true}, title: {_is_null: false}, summary: {_is_null: false}}, limit: $limit, offset: $offset) {
        article_group_id
        title
        summary
      }
    }
    '''
    offset = offset1
    mutation_query = """
    mutation MyMutation($updates: [synopse_articles_t_v4_article_groups_l2_detail_updates!] = {where: {}}) {
      update_synopse_articles_t_v4_article_groups_l2_detail_many(updates: $updates) {
        affected_rows
      }
    }
    """
    while True:
        variables = {
        "limit": 1,
        "offset": offset
        }
        response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
        if len(response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']) == 0:
            break
        for response in response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']:

            synopse_articles_t_v4_article_groups_l2_detail_updates_loc=[]
            keyword_prompt = """
            [INST]
            I have the following article that I'd like you to summarize:

            Title: [TITLE]
            detail: [SUMMARY]


            Please provide a concise UNBIASED news summary article based on the information provided in the Title, and detail.
            The summary should be approximately 70 words in length.
            Make sure you to only return the article say nothing else. For example, don't say:
            "Here are the article title or article"
            [/INST]
            """
            # Replace placeholders with actual data
            filled_prompt = keyword_prompt.replace("[TITLE]", response['title']).replace("[SUMMARY]",  response['summary'])
            response1 = generator(filled_prompt)# Get the generated text
            generated_text = response1[0]["generated_text"]
            start_index = generated_text.find("[/INST]") + len("[/INST]")
            article_final = generated_text[start_index:].strip()
            print(article_final)
            synopse_articles_t_v4_article_groups_l2_detail_updates_loc.append({
              "where": {"article_group_id" : { "_eq": response['article_group_id'] }},
              "_set": {"summary_60_words": article_final }
              })
        mutation_variables = {
        "updates": synopse_articles_t_v4_article_groups_l2_detail_updates_loc,
        }
        out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

def array2dto2d(n2):
    n1 = []
    for sublist in n2:
        for element in sublist:
            n1.append(element)
    n2 = list(set(n1))
    return n2

def detail_tags(offset1):
  graphql_query = '''
  query MyQuery($limit: Int!, $offset: Int!) {
  synopse_articles_t_v4_article_groups_l2_detail(where: {ai_tags: {_is_null: true}, summary_60_words: {_is_null: false}}, limit: $limit, offset: $offset) {
      id
      t_v3_article_groups_l2 {
        articles_group
      }
    }
  }
  '''
  query2 = '''
  query MyQuery($article_id: bigint = "") {
    synopse_articles_t_v1_rss1_articles(where: {id: {_eq: $article_id}}) {
      image_link
      tags
      t_v1_rss1_feed_link {
        t_v1_outlet {
          logo_url
        }
      }
      t_v2_articles_summary {
        keywords_tags
        location_tags
        org_tags
        person_tags
        ai_tags
      }
    }
  }
  '''
  offset = offset1
  mutation_query = """
  mutation MyMutation($updates: [synopse_articles_t_v4_article_groups_l2_detail_updates!] = {where: {}}) {
    update_synopse_articles_t_v4_article_groups_l2_detail_many(updates: $updates) {
      affected_rows
    }
  }
  """
  while True:
      variables = {
      "limit": 1,
      "offset": offset
      }
      response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
      synopse_articles_t_v4_article_groups_l2_detail_updates_loc=[]
      image_urls=[]
      logo_urls=[]
      keywords_tags=[]
      location_tags=[]
      org_tags=[]
      person_tags=[]
      ai_tags=[]
      if len(response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']) == 0:
            break
      for response in response_data['data']['synopse_articles_t_v4_article_groups_l2_detail']:
          for article in response['t_v3_article_groups_l2']['articles_group']:
              variables2 = {
              "article_id": article
              }
              response_data1 = query_hasura_graphql(endpoint, admin_key, query2, variables2)
              for response1 in response_data1['data']['synopse_articles_t_v1_rss1_articles']:
                  image_urls.append(response1['image_link'])
                  logo_urls.append(response1['t_v1_rss1_feed_link']['t_v1_outlet']['logo_url'])
                  keywords_tags.append(response1['tags'])
                  keywords_tags.append(response1['t_v2_articles_summary']['keywords_tags'])
                  location_tags.append(response1['t_v2_articles_summary']['location_tags'])
                  org_tags.append(response1['t_v2_articles_summary']['org_tags'])
                  person_tags.append(response1['t_v2_articles_summary']['person_tags'])
                  ai_tags.append(response1['t_v2_articles_summary']['ai_tags'])
      image_urls = list(set(image_urls))
      image_urls = [url for url in image_urls if url != ""]
      logo_urls = list(set(logo_urls))
      logo_urls = [url for url in logo_urls if url != ""]
      keywords_tags = array2dto2d(keywords_tags)
      location_tags = array2dto2d(location_tags)
      org_tags = array2dto2d(org_tags)
      person_tags = array2dto2d(person_tags)
      ai_tags = array2dto2d(ai_tags)
      synopse_articles_t_v4_article_groups_l2_detail_updates_loc.append({
          "where": {"id" : { "_eq": response['id'] }},
          "_set": {"image_urls": image_urls,
                  "logo_urls": logo_urls,
                  "keywords_tags": keywords_tags,
                  "location_tags": location_tags,
                  "org_tags": org_tags,
                  "person_tags": person_tags,
                  "ai_tags": ai_tags
                   }
          })
      mutation_variables = {
      "updates": synopse_articles_t_v4_article_groups_l2_detail_updates_loc,
      }
      out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

def vectorize_groups(offset1):
  model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
  graphql_query = '''
  query MyQuery($limit: Int!, $offset: Int!) {
    synopse_articles_t_v3_article_groups_l2(where: {is_vectorized: {_eq: 0}, is_summerized: {_eq: 1}, t_v4_article_groups_l2_detail: {ai_tags: {_is_null: false}}}, limit: $limit, offset: $offset) {
      t_v4_article_groups_l2_detail {
        article_group_id
        keywords_tags
        location_tags
        org_tags
        person_tags
        ai_tags
        summary
        summary_60_words
        title
      }
    }
  }
  '''
  offset = 0
  mutation_query = """
  mutation MyMutation($objects: [synopse_articles_t_v5_article_groups_vectors_insert_input!] = {}, $updates: [synopse_articles_t_v3_article_groups_l2_updates!] = {where: {}}) {
    insert_synopse_articles_t_v5_article_groups_vectors(objects: $objects, on_conflict: {constraint: t_v5_article_groups_vectors_article_group_id_key}) {
      affected_rows
    }
    update_synopse_articles_t_v3_article_groups_l2_many(updates: $updates) {
      affected_rows
    }
  }
  """
  while True:
    variables = {
    "limit": 20,
    "offset": offset
    }
    synopse_articles_t_v5_article_groups_vectors_insert_input_loc = []
    synopse_articles_t_v3_article_groups_l2_updates_loc = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    if len(response_data['data']['synopse_articles_t_v3_article_groups_l2']) == 0:
        break
    p1 = []
    article_group_ids = []
    for response in response_data['data']['synopse_articles_t_v3_article_groups_l2']:
      article_group_ids.append( response['t_v4_article_groups_l2_detail']['article_group_id'] )
      tags  = ', '.join(response['t_v4_article_groups_l2_detail']['keywords_tags'] ) + " " + ', '.join(response['t_v4_article_groups_l2_detail']['location_tags'] ) + " " + ', '.join(response['t_v4_article_groups_l2_detail']['org_tags'] ) + " " + ', '.join(response['t_v4_article_groups_l2_detail']['person_tags'] ) + " " + ', '.join(response['t_v4_article_groups_l2_detail']['ai_tags'] ) + " " + ', '.join(response['t_v4_article_groups_l2_detail']['ai_tags'] )
      p12 = response['t_v4_article_groups_l2_detail']['title'] + "\n" + response['t_v4_article_groups_l2_detail']['summary'] + "\n" + response['t_v4_article_groups_l2_detail']['summary_60_words'] + tags
      p1.append(p12)
    embeddings = model.encode(p1)
    for i in range(0,len(article_group_ids)):
      synopse_articles_t_v5_article_groups_vectors_insert_input_loc.append({
          "article_group_id": article_group_ids[i],
          "vector1":  str(embeddings[i].tolist()),
          }
          )
      synopse_articles_t_v3_article_groups_l2_updates_loc.append({
          "where": {"id" : { "_eq": article_group_ids[i] }},
          "_set": {"is_vectorized": 1}
          })

    mutation_variables = {
        "objects": synopse_articles_t_v5_article_groups_vectors_insert_input_loc,
        "updates": synopse_articles_t_v3_article_groups_l2_updates_loc,
        }
    out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)

  def trending_search_tags(offset1):
    model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
    graphql_query = '''
    query MyQuery {
      synopse_articles_t_v4_tags_hierarchy(where: {tags1: {_is_null: true}}) {
        tag
        tag_description
        id
      }
    }
    '''
    offset = 0
    mutation_query = """
    mutation MyMutation($updates: [synopse_articles_t_v4_tags_hierarchy_updates!] = {where: {}}) {
      update_synopse_articles_t_v4_tags_hierarchy_many(updates: $updates) {
        affected_rows
      }
    }
    """
    variables = {
    }
    synopse_articles_t_v4_tags_hierarchy_updates_loc = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    p1 = []
    tag_ids = []
    for response in response_data['data']['synopse_articles_t_v4_tags_hierarchy']:
      tag_ids.append( response['id'] )
      p12 = "Represent this query for searching documents: show articles most relevent to the tag "  + response['tag'] + "\n" + "description of the tag for more precise search results is " + response['tag_description']
      p1.append(p12)

    embeddings = model.encode(p1)
    for i in range(0,len(tag_ids)):
      synopse_articles_t_v4_tags_hierarchy_updates_loc.append({
          "where": {"id" : { "_eq": tag_ids[i] }},
          "_set": {"tags1":  str(embeddings[i].tolist())}
          })

    mutation_variables = {
        "updates": synopse_articles_t_v4_tags_hierarchy_updates_loc,
        }
    out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)
    graphql_query = '''
    query MyQuery {
      synopse_realtime_t_temp_trending_searches(where: {search1: {_is_null: true}}) {
        id
        search_text
      }
    }

    '''
    offset = 0
    mutation_query = """
    mutation MyMutation($updates: [synopse_realtime_t_temp_trending_searches_updates!] = {where: {}}) {
      update_synopse_realtime_t_temp_trending_searches_many(updates: $updates) {
        affected_rows
      }
    }
    """
    variables = {
    }
    synopse_realtime_t_temp_trending_searches_updates_loc = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    p1 = []
    search_ids = []
    for response in response_data['data']['synopse_realtime_t_temp_trending_searches']:
      search_ids.append( response['id'] )
      p12 = "Represent this query for searching documents: "  + response['search_text']
      p1.append(p12)

    embeddings = model.encode(p1)
    for i in range(0,len(search_ids)):
      synopse_realtime_t_temp_trending_searches_updates_loc.append({
          "where": {"id" : { "_eq": search_ids[i] }},
          "_set": {"search1":  str(embeddings[i].tolist())}
          })

    mutation_variables = {
        "updates": synopse_realtime_t_temp_trending_searches_updates_loc,
        }
    out1 = mutation_hasura_graphql(endpoint=endpoint, admin_key=admin_key, mutation_query=mutation_query, mutation_variables=mutation_variables)




False


In [9]:
def update_articles():
    graphql_query = '''
    query MyQuery($link_type: Int!) {
        synopse_articles_t_v1_rss1_feed_links(where: {rss1_link_type: {_eq: $link_type}}) {
          rss1_link
          rss1_link_name
          outlet
        }
      }
    '''
    # Define the variables dictionary
    variables = {
        "link_type": 11
    }
    rss1_links_array = []
    rss1_link_name = []
    outlet = []
    response_data = query_hasura_graphql(endpoint, admin_key, graphql_query, variables)
    if response_data:
        rss1_links_array = [item["rss1_link"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
        outlet = [item["outlet"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
        rss1_link_name = [item["rss1_link_name"] for item in response_data["data"]["synopse_articles_t_v1_rss1_feed_links"]]
    mutation_query = """
    mutation MyMutation($objects: [synopse_articles_t_v1_rss1_articles_insert_input!] = {}) {
        insert_synopse_articles_t_v1_rss1_articles(objects: $objects, on_conflict: {constraint: t_v1_rss1_articals_post_link_key}) {
            affected_rows
        }
    }
    """
    for i in range(0,len(rss1_links_array)):
        NewsFeed = feedparser.parse(rss1_links_array[i])
        articles = []
        if (rss1_links_array[i] == "http://rss.cnn.com/rss/edition_world.rss"):
            print("############################################################")
            for entry in NewsFeed.entries:
                if outlet[i] +"." in entry.link:
                    is_default_image = 0
                    title = entry.title
                    summary = ''
                    if 'summary' in entry:
                        summary_nofil = entry.summary
                        summary = re.sub('<[^<]+?>', '', summary_nofil)
                    image_url = ""
                    if 'media_content' in entry:
                        image_url = entry['media_content'][0]['url']
                        is_default_image = 1
                    if 'links' in entry:
                        for link in entry.links:
                            if link.type == "image/jpeg":
                                image_url= link.href
                                is_default_image = 1
                                break
                    post_link = entry.link
                    published = datetime.now(timezone.utc).isoformat()
                    if 'published' in entry:
                        published = entry.published
                    datevalidation = is_valid_timezone_format(published)
                    if datevalidation[0]:
                        hasura_timestamp = datevalidation[1]
                    if check_date_format(published):
                        hasura_timestamp = published
                    else:
                        hasura_timestamp = datetime.now().astimezone(timezone.utc).isoformat()
                    if "author" in entry:
                        author = entry.author
                    else:
                        author = "na"
                    tags = []
                    tags.append(rss1_link_name[i])
                    tags.append(outlet[i])
                    if 'tags' in entry:
                        for tag in entry.tags:
                            tags.append(tag.term)
                    if outlet[i] in post_link:
                        articles.append({
                                "rss1_link": rss1_links_array[i],
                                "post_link": post_link,
                                "title": title,
                                "summary": summary,
                                "author": author,
                                "image_link" : image_url,
                                "post_published": hasura_timestamp,
                                "is_default_image": is_default_image,
                                "tags": tags,
                            }
                        )
                    print(post_link, title, summary, author, image_url, hasura_timestamp, is_default_image)
            mutation_variables = {
                "objects": articles
            }
            print({'query': mutation_query, 'variables': mutation_variables})
            out1 = mutation_hasura_graphql(endpoint = endpoint, admin_key = admin_key, mutation_query = mutation_query, mutation_variables = mutation_variables)
            print(out1)
update_articles()

############################################################
https://www.cnn.com/business/live-news/stock-market-bank-earnings/index.html Markets digest bank earnings after recent turmoil  na https://cdn.cnn.com/cnnnext/dam/assets/230328155427-01-nyse-0328-super-169.jpg 2023-12-18T07:41:10.317865+00:00 1
https://www.cnn.com/2023/04/13/success/tax-filing-tips/index.html Still haven't filed your taxes? Here's what you need to know So far this tax season, the IRS has received more than 90 million income tax returns for 2022. na https://cdn.cnn.com/cnnnext/dam/assets/230411161107-stressed-man-stock-super-169.jpeg 2023-12-18T07:41:10.317865+00:00 1
https://www.cnn.com/2023/04/14/economy/march-retail-sales/index.html Retail spending fell in March as consumers pull back Spending at US retailers fell in March as consumers pulled back amid recessionary fears fueled by the banking crisis. na https://cdn.cnn.com/cnnnext/dam/assets/230412211243-grocery-store-california-230412-super-169.jpg 2023-12