# 1) Datensatz herunterladen

## 1.1 Mount Google Drive & Create necessary folder structures

In [1]:
from google.colab import drive
import os

# Define the folder paths
main_folder_path = '/content/drive/MyDrive/wine_project'
raw_data_folder_path = os.path.join(main_folder_path, 'raw_data')
processed_data_folder_path = os.path.join(main_folder_path, 'processed_data')
training_data_folder_path = os.path.join(main_folder_path, 'training_data')
models_folder_path = os.path.join(main_folder_path, 'models')
plots_folder_path = os.path.join(main_folder_path, 'plots')

# Folders for plot data
exploration_plots_folder_path = os.path.join(plots_folder_path,'exploration')
analysis_plots_folder_path = os.path.join(plots_folder_path,'analysis')
features_plots_folder_path = os.path.join(plots_folder_path,'features')
model_plots_folder_path = os.path.join(plots_folder_path,'models')


# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Create the main folder if it doesn't exist
os.makedirs(main_folder_path, exist_ok=True)

# Create the 'raw_data' folder if it doesn't exist
os.makedirs(raw_data_folder_path, exist_ok=True)

# Create the 'processed_data' folder if it doesn't exist
os.makedirs(processed_data_folder_path, exist_ok=True)

# Create the 'training_data' folder if it doesn't exist
os.makedirs(training_data_folder_path, exist_ok=True)

# Create the 'models' folder if it doesn't exist
os.makedirs(models_folder_path, exist_ok=True)

# Create the 'plots' folder if it doesn't exist and it's subfolders
os.makedirs(plots_folder_path, exist_ok=True)
os.makedirs(exploration_plots_folder_path, exist_ok=True)
os.makedirs(analysis_plots_folder_path, exist_ok=True)
os.makedirs(features_plots_folder_path, exist_ok=True)
os.makedirs(model_plots_folder_path, exist_ok=True)

# Confirm folder creation
print(f"Main folder is ready at: {main_folder_path}")
print(f"'raw_data' folder is ready at: {raw_data_folder_path}")
print(f"'processed_data' folder is ready at: {processed_data_folder_path}")
print(f"'training_data' folder is ready at: {training_data_folder_path}")
print(f"'models' folder is ready at: {models_folder_path}")
print(f"'plots' folder is ready at: {plots_folder_path}")
print(f"'exploration' folder is ready at: {exploration_plots_folder_path}")
print(f"'analysis' folder is ready at: {analysis_plots_folder_path}")
print(f"'features' folder is ready at: {features_plots_folder_path}")
print(f"'model plots' folder is ready at: {model_plots_folder_path}")


Mounted at /content/drive
Main folder is ready at: /content/drive/MyDrive/wine_project
'raw_data' folder is ready at: /content/drive/MyDrive/wine_project/raw_data
'processed_data' folder is ready at: /content/drive/MyDrive/wine_project/processed_data
'training_data' folder is ready at: /content/drive/MyDrive/wine_project/training_data
'models' folder is ready at: /content/drive/MyDrive/wine_project/models
'plots' folder is ready at: /content/drive/MyDrive/wine_project/plots
'exploration' folder is ready at: /content/drive/MyDrive/wine_project/plots/exploration
'analysis' folder is ready at: /content/drive/MyDrive/wine_project/plots/analysis
'features' folder is ready at: /content/drive/MyDrive/wine_project/plots/features
'model plots' folder is ready at: /content/drive/MyDrive/wine_project/plots/models


## 1.2 Import Kaggle.json for API

Das kaggle.json sollte vor dem ausführen vorhanden sein. Es kann mit dem "Choose Files" Button hochgeladen werden.

In [2]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle1.json to kaggle1.json
User uploaded file "kaggle1.json" with length 69 bytes
mv: cannot stat 'kaggle.json': No such file or directory


## 1.3 Datensatz herunterladen

In [3]:
import shutil
import zipfile

# Download the dataset from Kaggle
!kaggle datasets download -d ruthgn/wine-quality-data-set-red-white-wine

# Move the downloaded zip file to the 'raw_data' folder
shutil.move('wine-quality-data-set-red-white-wine.zip', raw_data_folder_path)

# Unzip the file in the 'raw_data' folder
with zipfile.ZipFile(os.path.join(raw_data_folder_path, 'wine-quality-data-set-red-white-wine.zip'), 'r') as zip_ref:
    zip_ref.extractall(raw_data_folder_path)

# Confirm that the file is moved and unzipped
print(f"Dataset has been moved and unzipped to: {raw_data_folder_path}")


Dataset URL: https://www.kaggle.com/datasets/ruthgn/wine-quality-data-set-red-white-wine
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading wine-quality-data-set-red-white-wine.zip to /content
  0% 0.00/98.0k [00:00<?, ?B/s]
100% 98.0k/98.0k [00:00<00:00, 66.2MB/s]
Dataset has been moved and unzipped to: /content/drive/MyDrive/wine_project/raw_data
