<a href="https://colab.research.google.com/github/farfalan/Csharp/blob/main/templates/aistudio_gemini_prompt_freeform.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Copyright 2023 Google LLC

In [1]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Setup

In [2]:
!pip install -U -q "google-generativeai>=0.8.2"

In [3]:
# import necessary modules.
import base64
import copy
import json
import pathlib
import requests


import PIL.Image
import IPython.display
from IPython.display import Markdown

try:
    # The SDK will automatically read it from the GOOGLE_API_KEY environment variable.
    # In Colab get the key from Colab-secrets ("🔑" in the left panel).
    import os
    from google.colab import userdata

    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
except ImportError:
    pass

import google.generativeai as genai

# Parse the arguments

model = 'gemini-2.0-flash-exp' # @param {isTemplate: true}
contents_b64 = 'W3sicm9sZSI6InVzZXIiLCJwYXJ0cyI6W3sidGV4dCI6ImZyb20gZ29vZ2xlIGFpIHN0dWRpbyBmb2xkZXIgaW4gZHJpdmUgbG9hZCB0aGUgZmlsZVxcbiJ9XX0seyJyb2xlIjoibW9kZWwiLCJwYXJ0cyI6W3sidGV4dCI6Ik9rYXksIGxldCdzIGJyZWFrIGRvd24gaG93IHRvIGxvYWQgYSBmaWxlIGZyb20geW91ciBHb29nbGUgRHJpdmUsIHNwZWNpZmljYWxseSBmcm9tIHRoZSBHb29nbGUgQUkgU3R1ZGlvIGZvbGRlciwgaW50byBhIENvbGFiIG5vdGVib29rIG9yIGEgc2ltaWxhciBlbnZpcm9ubWVudCB3aGVyZSB5b3UgbWlnaHQgYmUgd29ya2luZyB3aXRoIGNvZGUuXFxuXFxuSGVyZSdzIHRoZSBwcm9jZXNzIGFuZCB0aGUgY29kZSB5b3UnbGwgbmVlZCwgYWxvbmcgd2l0aCBleHBsYW5hdGlvbnM6XFxuXFxuKipVbmRlcnN0YW5kaW5nIHRoZSBCYXNpY3MqKlxcblxcbjEuICAqKkdvb2dsZSBEcml2ZSBNb3VudGluZzoqKiBUbyBhY2Nlc3MgZmlsZXMgaW4geW91ciBHb29nbGUgRHJpdmUgd2l0aGluIGEgY29kaW5nIGVudmlyb25tZW50IChsaWtlIENvbGFiIG9yIGEgbG9jYWwgUHl0aG9uIHNjcmlwdCksIHlvdSBmaXJzdCBuZWVkIHRvIFxcXCJtb3VudFxcXCIgeW91ciBEcml2ZS4gVGhpcyBlc3RhYmxpc2hlcyBhIGNvbm5lY3Rpb24gYmV0d2VlbiB0aGUgZW52aXJvbm1lbnQgYW5kIHlvdXIgRHJpdmUgc3RvcmFnZS5cXG4yLiAgKipGaWxlIFBhdGg6KiogT25jZSBtb3VudGVkLCB5b3UgbmVlZCB0byBrbm93IHRoZSBjb3JyZWN0IGZpbGUgcGF0aCB3aXRoaW4geW91ciBHb29nbGUgRHJpdmUgdG8gdGhlIGZpbGUgeW91IHdhbnQgdG8gbG9hZC4gQXNzdW1pbmcgeW91ciBHb29nbGUgQUkgU3R1ZGlvIGZvbGRlciBpcyBhdCB0aGUgdG9wIGxldmVsIG9mIHlvdXIgRHJpdmUsIHRoZSBwYXRoIHdvdWxkIGJlIHNvbWV0aGluZyBsaWtlIGAvY29udGVudC9kcml2ZS9NeURyaXZlL0dvb2dsZSBBSSBTdHVkaW8vPHlvdXJfZmlsZV9uYW1lPmAuXFxuXFxuKipDb2RlIEV4YW1wbGUgKGZvciBHb29nbGUgQ29sYWIpOioqXFxuXFxuYGBgcHl0aG9uXFxuZnJvbSBnb29nbGUuY29sYWIgaW1wb3J0IGRyaXZlXFxuaW1wb3J0IG9zXFxuXFxuIyAxLiBNb3VudCBHb29nbGUgRHJpdmVcXG5kcml2ZS5tb3VudCgnL2NvbnRlbnQvZHJpdmUnKVxcblxcbiMgMi4gRGVmaW5lIHRoZSBmaWxlIHBhdGhcXG5maWxlX25hbWUgPSBcXFwieW91cl9maWxlX25hbWUudHh0XFxcIiAgIyBSZXBsYWNlIHdpdGggdGhlIGFjdHVhbCBmaWxlIG5hbWVcXG5maWxlX3BhdGggPSBmXFxcIi9jb250ZW50L2RyaXZlL015RHJpdmUvR29vZ2xlIEFJIFN0dWRpby97ZmlsZV9uYW1lfVxcXCJcXG5cXG5cXG4jIDMuIENoZWNrIGlmIHRoZSBmaWxlIGV4aXN0cyAob3B0aW9uYWwsIGJ1dCBnb29kIHByYWN0aWNlKVxcbmlmIG9zLnBhdGguZXhpc3RzKGZpbGVfcGF0aCk6XFxuICBwcmludChmXFxcIkZpbGUgZm91bmQgYXQ6IHtmaWxlX3BhdGh9XFxcIilcXG5lbHNlOlxcbiAgICBwcmludChmXFxcIkVycm9yOiBGaWxlIG5vdCBmb3VuZCBhdDoge2ZpbGVfcGF0aH1cXFwiKVxcbiAgICBleGl0KCkgIyBFeGl0IHRoZSBzY3JpcHQgaWYgdGhlIGZpbGUgZG9lc24ndCBleGlzdFxcblxcbiMgNC4gTG9hZCB0aGUgZmlsZSAoZXhhbXBsZSBmb3IgYSB0ZXh0IGZpbGUsIGFkanVzdCBmb3Igb3RoZXIgdHlwZXMpXFxudHJ5OlxcbiAgd2l0aCBvcGVuKGZpbGVfcGF0aCwgJ3InKSBhcyBmOlxcbiAgICAgIGNvbnRlbnQgPSBmLnJlYWQoKVxcbiAgcHJpbnQoXFxcIkZpbGUgY29udGVudDpcXFxcblxcXCIsIGNvbnRlbnQpXFxuZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOlxcbiAgcHJpbnQoZlxcXCJBbiBlcnJvciBvY2N1cnJlZCB3aGlsZSBsb2FkaW5nIHRoZSBmaWxlOiB7ZX1cXFwiKVxcbmBgYFxcblxcbioqRXhwbGFuYXRpb246KipcXG5cXG4xLiAgKipgZnJvbSBnb29nbGUuY29sYWIgaW1wb3J0IGRyaXZlYCoqOiBUaGlzIGltcG9ydHMgdGhlIG5lY2Vzc2FyeSBtb2R1bGUgdG8gaW50ZXJhY3Qgd2l0aCBHb29nbGUgRHJpdmUgaW4gQ29sYWIuXFxuMi4gICoqYGRyaXZlLm1vdW50KCcvY29udGVudC9kcml2ZScpYCoqOiBUaGlzIG1vdW50cyB5b3VyIEdvb2dsZSBEcml2ZSB0byB0aGUgc3BlY2lmaWVkIGRpcmVjdG9yeSBpbiB0aGUgQ29sYWIgZW52aXJvbm1lbnQgKGAvY29udGVudC9kcml2ZWApLiBJdCB3aWxsIHByb21wdCB5b3UgZm9yIGF1dGhvcml6YXRpb24sIGZvbGxvdyB0aGUgaW5zdHJ1Y3Rpb25zLiBBZnRlciB0aGUgZmlyc3QgbW91bnQgaXQgbWF5IG5vdCBwcm9tcHQgYWdhaW4gb24gc3Vic2VxdWVudCBydW5zIGlmIHlvdSd2ZSBncmFudGVkIHBlcm1pc3Npb25zIHByZXZpb3VzbHkuXFxuMy4gICoqYGZpbGVfbmFtZSA9IFxcXCJ5b3VyX2ZpbGVfbmFtZS50eHRcXFwiYCoqOiBDaGFuZ2UgdGhpcyB0byB0aGUgYWN0dWFsIGZpbGUgbmFtZSAoaW5jbHVkaW5nIGV4dGVuc2lvbikgb2YgdGhlIGZpbGUgeW91IHdhbnQgdG8gbG9hZCwgc3VjaCBhcyBgbXlfZGF0YS5jc3ZgLCBgbXlfbW9kZWwuaDVgLCBvciB3aGF0ZXZlciB5b3VyIGZpbGUgdHlwZSBpcy5cXG40LiAgKipgZmlsZV9wYXRoID0gLi4uYCoqOiBUaGlzIGNyZWF0ZXMgdGhlIGZ1bGwgZmlsZSBwYXRoIHVzaW5nIGFuIGYtc3RyaW5nIHRvIHB1dCB0aGUgY29tcG9uZW50cyB0b2dldGhlci4gVGhlIGBNeURyaXZlYCBkaXJlY3RvcnkgaXMgdGhlIGRlZmF1bHQgZm9yIHlvdXIgbWFpbiBEcml2ZSBmb2xkZXIuIElmIHlvdXIgR29vZ2xlIEFJIFN0dWRpbyBmb2xkZXIgaXMgaW4gYSBzdWJmb2xkZXIgb2YgeW91ciBEcml2ZSBhZGp1c3QgdGhlIHBhdGggYWNjb3JkaW5nbHkuXFxuNS4gICoqYG9zLnBhdGguZXhpc3RzKGZpbGVfcGF0aClgKio6ICBUaGlzIGxpbmUgY2hlY2tzIGlmIHRoZSBmaWxlIHlvdSd2ZSBzcGVjaWZpZWQgZXhpc3RzIGF0IHRoZSBnaXZlbiBwYXRoLiBJdCBwcmV2ZW50cyBlcnJvcnMgbGF0ZXIgb24uXFxuNi4gICoqYHdpdGggb3BlbihmaWxlX3BhdGgsICdyJykgYXMgZjpgKio6IFRoaXMgb3BlbnMgdGhlIGZpbGUgZm9yIHJlYWRpbmcuIFRoZSBgJ3InYCBtb2RlIG1lYW5zIFxcXCJyZWFkLW9ubHlcXFwiLiBcXG43LiAgKipgY29udGVudCA9IGYucmVhZCgpYCoqOiBUaGlzIHJlYWRzIHRoZSBlbnRpcmUgY29udGVudHMgb2YgdGhlIGZpbGUgaW50byB0aGUgYGNvbnRlbnRgIHZhcmlhYmxlLlxcbjguICAqKmBwcmludChjb250ZW50KWAqKjogVGhpcyBwcmludHMgdGhlIGNvbnRlbnQgdG8gdGhlIENvbGFiIG5vdGVib29rIG91dHB1dCBmb3IgeW91IHRvIGluc3BlY3QuXFxuOS4gKipgdHJ5Li4uZXhjZXB0YCoqOiBUaGlzIGF0dGVtcHRzIHRoZSBvcGVuIGFuZCByZWFkIHN0ZXBzLCBhbmQgaWYgYW55IGVycm9ycyBvY2N1ciB0aGUgYGV4Y2VwdGAgYmxvY2sgd2lsbCBjYXRjaCBpdCBhbmQgcHJpbnQgYW4gZXJyb3IgbWVzc2FnZSwgcmF0aGVyIHRoYW4gY3Jhc2hpbmcgdGhlIHByb2dyYW0uXFxuXFxuKipJbXBvcnRhbnQgQ29uc2lkZXJhdGlvbnM6KipcXG5cXG4qICAgKipGaWxlIFR5cGU6KiogVGhlIGV4YW1wbGUgY29kZSByZWFkcyBhIHRleHQgZmlsZS4gSWYgeW91IGhhdmUgYSBkaWZmZXJlbnQgdHlwZSBvZiBmaWxlIChsaWtlIGEgQ1NWLCBKU09OLCBpbWFnZSwgZXRjLiksIHlvdSdsbCBuZWVkIHRvIHVzZSB0aGUgYXBwcm9wcmlhdGUgbGlicmFyaWVzIGFuZCBtZXRob2RzIHRvIGxvYWQgaXQ6XFxuICAgICogICAqKkNTVjoqKiBVc2UgYGltcG9ydCBjc3ZgIGFuZCBgY3N2LnJlYWRlcihmKWAgb3IgYHBhbmRhcy5yZWFkX2NzdihmaWxlX3BhdGgpYC5cXG4gICAgKiAgICoqSlNPTjoqKiBVc2UgYGltcG9ydCBqc29uYCBhbmQgYGpzb24ubG9hZChmKWAuXFxuICAgICogICAqKkltYWdlczoqKiBVc2UgYFBJTGAgKFBpbGxvdykgb3IgT3BlbkNWIGxpYnJhcmllcy5cXG4gICAgKiAgICoqSERGNSAoZS5nLiwgZm9yIGEgS2VyYXMgbW9kZWwpOioqIFVzZSBgdGVuc29yZmxvdy5rZXJhcy5tb2RlbHMubG9hZF9tb2RlbChmaWxlX3BhdGgpYCBvciBgaDVweWAgbGlicmFyeS5cXG4qICAgKipQZXJtaXNzaW9uczoqKiBZb3UgbmVlZCB0byBoYXZlIHByb3BlciBwZXJtaXNzaW9ucyBmb3IgYWNjZXNzaW5nIHRoZSBmaWxlcyBpbiB5b3VyIEdvb2dsZSBEcml2ZS5cXG4qICAgKipDb2xhYiB2cy4gTG9jYWw6KiogVGhpcyBjb2RlIGlzIHNwZWNpZmljYWxseSBmb3IgQ29sYWIuIElmIHJ1bm5pbmcgbG9jYWxseSwgbW91bnRpbmcgYW5kIGdldHRpbmcgcGF0aCBtYXkgYmUgZGlmZmVyZW50LlxcblxcbioqSG93IHRvIEFkYXB0IHRoZSBDb2RlOioqXFxuXFxuMS4gICoqQ2hhbmdlIEZpbGUgTmFtZToqKiBVcGRhdGUgdGhlIGBmaWxlX25hbWVgIHZhcmlhYmxlIHRvIG1hdGNoIHlvdXIgZmlsZS5cXG4yLiAgKipBZGp1c3QgTG9hZGluZyBNZXRob2Q6KiogQ2hvb3NlIHRoZSBwcm9wZXIgbG9hZGluZyBhbmQgbWFuaXB1bGF0aW9uIG1ldGhvZHMgYmFzZWQgb24geW91ciBmaWxlIHR5cGUuXFxuMy4gICoqQ2hlY2sgRmlsZSBQYXRoOioqIFZlcmlmeSB0aGF0IHRoZSBmaWxlIHBhdGggaXMgY29ycmVjdC5cXG5cXG4qKkxldCBtZSBrbm93IHdoYXQgZmlsZSB0eXBlIHlvdSBhcmUgd29ya2luZyB3aXRoIGFuZCBpZiB5b3UgZW5jb3VudGVyIGFueSBlcnJvcnMsIGFuZCBJIGNhbiBwcm92aWRlIG1vcmUgc3BlY2lmaWMgZ3VpZGFuY2UuKipcXG4ifV19XQ==' # @param {isTemplate: true}
generation_config_b64 = 'eyJ0ZW1wZXJhdHVyZSI6MSwidG9wX3AiOjAuOTUsInRvcF9rIjo0MCwibWF4X291dHB1dF90b2tlbnMiOjgxOTJ9' # @param {isTemplate: true}
safety_settings_b64 = "e30="  # @param {isTemplate: true}

gais_contents = json.loads(base64.b64decode(contents_b64))

generation_config = json.loads(base64.b64decode(generation_config_b64))
safety_settings = json.loads(base64.b64decode(safety_settings_b64))

stream = False

# Convert and upload the files

tempfiles = pathlib.Path(f"tempfiles")
tempfiles.mkdir(parents=True, exist_ok=True)


drive = None
def upload_file_data(file_data, index):
    """Upload files to the Files API.

    For each file, Google AI Studio either sent:
    - a Google Drive ID,
    - a URL,
    - a file path, or
    - The raw bytes (`inline_data`).

    The API only understands `inline_data` or it's Files API.
    This code, uploads files to the files API where the API can access them.
    """

    mime_type = file_data["mime_type"]
    if drive_id := file_data.pop("drive_id", None):
        if drive is None:
          from google.colab import drive
          drive.mount("/gdrive")

        path = next(
            pathlib.Path(f"/gdrive/.shortcut-targets-by-id/{drive_id}").glob("*")
        )
        print("Uploading:", str(path))
        file_info = genai.upload_file(path=path, mime_type=mime_type)
        file_data["file_uri"] = file_info.uri
        return

    if url := file_data.pop("url", None):
        response = requests.get(url)
        data = response.content
        name = url.split("/")[-1]
        path = tempfiles / str(index)
        path.write_bytes(data)
        print("Uploading:", url)
        file_info = genai.upload_file(path, display_name=name, mime_type=mime_type)
        file_data["file_uri"] = file_info.uri
        return

    if name := file_data.get("filename", None):
        if not pathlib.Path(name).exists():
            raise IOError(
                f"local file: `{name}` does not exist. You can upload files "
                'to Colab using the file manager ("📁 Files" in the left '
                "toolbar)"
            )
        file_info = genai.upload_file(path, display_name=name, mime_type=mime_type)
        file_data["file_uri"] = file_info.uri
        return

    if "inline_data" in file_data:
        return

    raise ValueError("Either `drive_id`, `url` or `inline_data` must be provided.")


contents = copy.deepcopy(gais_contents)

index = 0
for content in contents:
    for n, part in enumerate(content["parts"]):
        if file_data := part.get("file_data", None):
            upload_file_data(file_data, index)
            index += 1

import json
print(json.dumps(contents, indent=4))

[
    {
        "role": "user",
        "parts": [
            {
                "text": "from google ai studio folder in drive load the file\\n"
            }
        ]
    },
    {
        "role": "model",
        "parts": [
            {
                "text": "Okay, let's break down how to load a file from your Google Drive, specifically from the Google AI Studio folder, into a Colab notebook or a similar environment where you might be working with code.\\n\\nHere's the process and the code you'll need, along with explanations:\\n\\n**Understanding the Basics**\\n\\n1.  **Google Drive Mounting:** To access files in your Google Drive within a coding environment (like Colab or a local Python script), you first need to \\\"mount\\\" your Drive. This establishes a connection between the environment and your Drive storage.\\n2.  **File Path:** Once mounted, you need to know the correct file path within your Google Drive to the file you want to load. Assuming your Google AI Studio folde

## Call `generate_content`

In [4]:
from IPython.display import display
from IPython.display import Markdown

# Call the model and print the response.
gemini = genai.GenerativeModel(model_name=model)

response = gemini.generate_content(
    contents,
    generation_config=generation_config,
    safety_settings=safety_settings,
    stream=stream,
)

display(Markdown(response.text))



<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://ai.google.dev/gemini-api/docs"><img src="https://ai.google.dev/static/site-assets/images/docs/notebook-site-button.png" height="32" width="32" />Docs on ai.google.dev</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/google-gemini/cookbook/blob/main/quickstarts"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />More notebooks in the Cookbook</a>
  </td>
</table>

## [optional] Show the conversation

This section displays the conversation received from Google AI Studio.

In [13]:
# prompt: how to chose a folder in drive

from google.colab import drive
drive.mount('/content/drive')

import os
folder_path = "/content/drive/MyDrive/Google AI Studio" #@param {type:"string"}

# Check if the folder exists
if os.path.exists(folder_path):
  print(f"Folder '{folder_path}' exists.")
  # Now you can work with files in this folder
  !ls "$folder_path"
else:
  print(f"Folder '{folder_path}' does not exist.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder '/content/drive/MyDrive/Google AI Studio' exists.
'ExampleKBARueckrufe(3).CSV'  'Loading Files From Google Drive'


In [16]:
# prompt: fix the problem with: Could not parse the CSV file

# ... (your existing code)

# Assuming the CSV file is in the same directory as the notebook
file_path = "/content/drive/MyDrive/Google AI Studio/ExampleKBARueckrufe(3).CSV"

try:
  # Try different encodings if 'latin-1' doesn't work
  # Common encodings to try include 'utf-8', 'utf-16', 'iso-8859-1', etc.
  for encoding in ['latin-1', 'utf-8', 'utf-16', 'iso-8859-1']:
    try:
      df = pd.read_csv(file_path, encoding=encoding, sep=';') # Specify separator if needed
      print(df)
      break # Exit the loop if successful
    except pd.errors.ParserError:
      print(f"Error: Could not parse the CSV file '{file_path}' with encoding '{encoding}'. Trying another encoding...")
      continue # Try the next encoding
    except Exception as e:
      print(f"An unexpected error occurred with encoding '{encoding}': {e}")
      break # Stop trying other encodings if an unexpected error occurs
  else: # This block will execute if no encoding was successful.
    print(f"Error: Could not parse the CSV file '{file_path}' with any of the tried encodings.  Please check the file's encoding and separator.")

except FileNotFoundError:
  print(f"Error: File '{file_path}' not found.")
except Exception as e:
  print(f"An unexpected error occurred: {e}")

    KBA Referenz­nummer                                 Mangelbeschreibung  \
0                  6918  Fehlerhafte Steuergeräte-Codierungen könnten u...   
1                 11719  Während eines Werkstattaufenthaltes fehlerhaft...   
2                 11819  Korrosion am Bremskraftverstärker kann im schl...   
3                 12538  Korrosion am Bremskraftverstärker kann im schl...   
4                 13376  Die Abschrankung am Gelenkbus entspricht nicht...   
..                  ...                                                ...   
175               10593  Aufgrund eines deaktivierten Kommunikationsmod...   
176               10653  Aufgrund der eintretenden Feuchtigkeit kann es...   
177               11062  Aufgrund fehlerhafter Temperaturdruckventile k...   
178               12026  Der Getriebeleitungssatz wurde nicht ordnungsg...   
179               12205  Mangelhaft befestigte 12-V-Batterie kann sich ...   

    Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bund

In [17]:
# prompt: show as table

import pandas as pd

# ... (your existing code)

# Assuming the CSV file is in the same directory as the notebook
file_path = "/content/drive/MyDrive/Google AI Studio/ExampleKBARueckrufe(3).CSV"

try:
  # Try different encodings if 'latin-1' doesn't work
  # Common encodings to try include 'utf-8', 'utf-16', 'iso-8859-1', etc.
  for encoding in ['latin-1', 'utf-8', 'utf-16', 'iso-8859-1']:
    try:
      df = pd.read_csv(file_path, encoding=encoding, sep=';') # Specify separator if needed
      # Display the DataFrame as an HTML table in the notebook
      display(df)
      break # Exit the loop if successful
    except pd.errors.ParserError:
      print(f"Error: Could not parse the CSV file '{file_path}' with encoding '{encoding}'. Trying another encoding...")
      continue # Try the next encoding
    except Exception as e:
      print(f"An unexpected error occurred with encoding '{encoding}': {e}")
      break # Stop trying other encodings if an unexpected error occurs
  else: # This block will execute if no encoding was successful.
    print(f"Error: Could not parse the CSV file '{file_path}' with any of the tried encodings.  Please check the file's encoding and separator.")

except FileNotFoundError:
  print(f"Error: File '{file_path}' not found.")
except Exception as e:
  print(f"An unexpected error occurred: {e}")

Unnamed: 0,KBA Referenz­nummer,Mangelbeschreibung,Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt,Baujahr von,Baujahr bis,Veröffent­lichungs­datum
0,6918,Fehlerhafte Steuergeräte-Codierungen könnten u...,überwacht,2003,2016,21.03.2017
1,11719,Während eines Werkstattaufenthaltes fehlerhaft...,überwacht,2004,2015,23.03.2022
2,11819,Korrosion am Bremskraftverstärker kann im schl...,überwacht,2004,2015,01.06.2022
3,12538,Korrosion am Bremskraftverstärker kann im schl...,überwacht,2004,2015,07.02.2023
4,13376,Die Abschrankung am Gelenkbus entspricht nicht...,überwacht,2004,2022,19.12.2023
...,...,...,...,...,...,...
175,10593,Aufgrund eines deaktivierten Kommunikationsmod...,nicht überwacht,2015,2020,12.02.2021
176,10653,Aufgrund der eintretenden Feuchtigkeit kann es...,überwacht,2015,2020,14.04.2021
177,11062,Aufgrund fehlerhafter Temperaturdruckventile k...,überwacht,2015,2021,10.08.2021
178,12026,Der Getriebeleitungssatz wurde nicht ordnungsg...,überwacht,2015,2022,19.08.2022


In [22]:
# prompt: train an model on my data, the input is the colmn Mangelbeschreibung and the output is the column Überwachung der Rück..

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# ... (your existing code)

# Assuming the CSV file is in the same directory as the notebook
file_path = "/content/drive/MyDrive/Google AI Studio/ExampleKBARueckrufe(3).CSV"

try:
    # Try different encodings if 'latin-1' doesn't work
    for encoding in ['latin-1', 'utf-8', 'utf-16', 'iso-8859-1']:
        try:
            df = pd.read_csv(file_path, encoding=encoding, sep=';')
            break
        except pd.errors.ParserError:
            print(f"Error: Could not parse with encoding '{encoding}'. Trying another...")
            continue
        except Exception as e:
            print(f"An unexpected error occurred with encoding '{encoding}': {e}")
            break
    else:
        print(f"Error: Could not parse the CSV file with any of the tried encodings.")
        raise  # Re-raise the exception to stop execution

except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
    raise
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    raise



df = df.dropna(subset=['Mangelbeschreibung', 'Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt']) # Remove rows with missing values in these columns
X = df['Mangelbeschreibung']
y = df['Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Model training (Logistic Regression)
model = LogisticRegression()
model.fit(X_train_vec, y_train)


# Model evaluation
y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

nicht überwacht       0.00      0.00      0.00         8
      überwacht       0.78      1.00      0.88        28

       accuracy                           0.78        36
      macro avg       0.39      0.50      0.44        36
   weighted avg       0.60      0.78      0.68        36



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [23]:
# prompt: classify the features wich have the label überwacht and wich nicht überwacht

# ... (your existing code)

# Assuming the CSV file is in the same directory as the notebook
file_path = "/content/drive/MyDrive/Google AI Studio/ExampleKBARueckrufe(3).CSV"

try:
    # Try different encodings if 'latin-1' doesn't work
    for encoding in ['latin-1', 'utf-8', 'utf-16', 'iso-8859-1']:
        try:
            df = pd.read_csv(file_path, encoding=encoding, sep=';')
            break
        except pd.errors.ParserError:
            print(f"Error: Could not parse with encoding '{encoding}'. Trying another...")
            continue
        except Exception as e:
            print(f"An unexpected error occurred with encoding '{encoding}': {e}")
            break
    else:
        print(f"Error: Could not parse the CSV file with any of the tried encodings.")
        raise  # Re-raise the exception to stop execution

except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
    raise
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    raise

df = df.dropna(subset=['Mangelbeschreibung', 'Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt'])
X = df['Mangelbeschreibung']
y = df['Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt']

# ... (your existing code)

# Classify features based on the label
supervised_features = df[df['Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt'] == 'überwacht']['Mangelbeschreibung']
unsupervised_features = df[df['Überwachung der Rück­ruf­ak­tion durch das Kraft­fahrt-Bundes­amt'] == 'nicht überwacht']['Mangelbeschreibung']

print("Supervised Features:")
print(supervised_features)
print("\nUnsupervised Features:")
unsupervised_features

Supervised Features:
0      Fehlerhafte Steuergeräte-Codierungen könnten u...
1      Während eines Werkstattaufenthaltes fehlerhaft...
2      Korrosion am Bremskraftverstärker kann im schl...
3      Korrosion am Bremskraftverstärker kann im schl...
4      Die Abschrankung am Gelenkbus entspricht nicht...
                             ...                        
174    Aufgrund elektrochemischer Vorgänge kann es zu...
176    Aufgrund der eintretenden Feuchtigkeit kann es...
177    Aufgrund fehlerhafter Temperaturdruckventile k...
178    Der Getriebeleitungssatz wurde nicht ordnungsg...
179    Mangelhaft befestigte 12-V-Batterie kann sich ...
Name: Mangelbeschreibung, Length: 136, dtype: object

Unsupervised Features:


Unnamed: 0,Mangelbeschreibung
12,Mangelhafte Sitzkonsole und Sicherheitsgurtver...
13,Rissbildung an Bremsleitungen bedingt verlänge...
21,An Fahrzeugen mit 2-Wege-Umbau kann durch inte...
25,Mangelbehaftete Reifen können bei geringem Luf...
34,Mangelhafte Software des Airmatic Steuergeräte...
35,Die hinteren Schrauben der Fondsitzbank können...
40,Tank kann sich lösen und in den Verkehrsraum f...
54,Falsch positionierter Gierratensensor bei Fahr...
57,Lösen einer Spannungsversorgungsleitung kann z...
58,Schraubverbund zwischen Hochdruckleitung und H...
