welcome

HannahKn123 · HannahKn123 · commit 525ac0457153 · 2024-08-20T10:24:01.000+02:00
diff --git a/Datasets/student_classification_dataset.csv b/Datasets/student_classification_dataset.csv
@@ -0,0 +1,101 @@
+age,study_hours_per_week,attendance_rate,number_of_courses,assignments_completed,test_scores,passed
+24,12,69.58247563,5,9,58,0
+21,18,65.79579488,7,6,73,0
+22,27,79.57811041,1,9,84,1
+24,25,99.42601816,3,4,98,1
+20,20,69.68221086,2,9,84,0
+22,22,86.8854219,1,4,97,0
+22,28,90.46478461,2,6,85,1
+24,30,69.50550176,7,8,67,1
+19,29,89.12865394,6,4,98,0
+20,33,74.71132531,2,0,88,0
+24,19,85.29223322,3,9,81,1
+20,5,85.34118843,6,9,73,0
+20,29,81.43098736,2,0,72,0
+22,11,63.6115908,7,1,81,0
+21,13,93.41209982,6,5,86,1
+20,28,72.8312026,7,8,61,1
+23,5,67.46074042,2,7,98,1
+22,12,61.63100566,7,4,62,1
+19,28,83.63571773,3,0,72,1
+21,15,87.10257447,2,6,74,1
+23,21,60.66351316,2,4,84,1
+23,12,80.48372233,2,5,90,0
+19,39,69.05983101,1,6,79,0
+21,39,85.80691162,1,2,66,0
+22,37,66.97465716,1,9,98,0
+18,9,87.63750952,3,2,69,0
+21,32,75.46941385,6,4,97,0
+19,11,97.46919955,7,5,74,0
+23,13,65.50083777,5,8,71,1
+22,12,73.64265404,2,4,62,1
+21,16,64.53894085,7,0,68,0
+18,38,96.98774473,2,3,98,0
+18,37,95.09357414,3,4,85,1
+20,27,70.31766511,2,9,61,0
+20,28,86.39936184,1,9,90,0
+24,39,92.68888801,5,4,68,1
+19,26,82.20803246,4,6,61,0
+21,31,81.18602313,2,3,58,1
+21,39,69.67409164,7,0,56,0
+24,5,63.72411071,1,4,77,1
+23,39,95.88863032,4,6,63,1
+23,18,96.01672229,5,9,80,1
+24,7,85.32405829,4,9,68,0
+23,5,73.56119164,1,5,96,1
+20,9,73.96838298,6,4,65,0
+21,30,89.03822715,6,3,54,0
+24,18,95.8844104,4,1,84,0
+21,31,95.48345697,3,3,61,0
+18,13,91.19502183,7,9,74,1
+20,19,85.68126585,7,9,70,1
+22,19,63.3655986,4,2,85,0
+20,30,66.46514856,2,9,72,0
+24,17,95.94216754,2,0,65,1
+22,36,84.25716239,7,7,88,0
+18,36,60.36788206,7,4,94,1
+24,8,64.05886171,6,3,91,0
+19,34,86.54007076,3,7,88,0
+21,27,60.20246335,1,6,63,1
+18,19,66.43232206,2,1,80,0
+21,33,81.94935157,7,0,54,1
+23,17,87.67580791,6,3,84,0
+19,36,86.07845038,5,7,72,0
+19,11,68.97077238,6,1,78,0
+18,26,88.48716885,2,2,92,1
+19,32,69.4899635,2,0,60,0
+22,6,73.01598793,6,0,67,0
+19,10,89.8596562,1,2,96,0
+21,32,85.98531596,4,4,61,0
+21,32,93.96893642,2,2,58,1
+24,24,86.30451569,6,0,59,1
+21,34,82.73234413,7,0,93,0
+24,15,63.74699071,6,7,66,1
+21,32,74.70863212,3,9,87,0
+22,29,70.60809471,4,1,56,0
+24,37,69.75958574,5,2,95,0
+20,5,98.92042219,1,1,62,1
+23,31,75.72390899,5,2,89,1
+18,17,95.68186221,4,6,91,1
+21,7,85.24554504,4,0,58,1
+19,10,91.79245214,4,9,99,1
+21,12,80.10548372,5,7,76,1
+19,31,83.07615539,7,9,51,1
+23,13,79.70070775,4,9,54,1
+23,37,67.80971951,6,9,78,1
+23,28,88.89808461,5,1,86,0
+19,19,71.2308945,4,2,87,1
+21,36,60.97263866,6,8,68,1
+23,36,85.81889184,7,6,57,1
+22,28,67.08442718,3,3,97,1
+24,16,97.61834337,7,9,94,1
+19,6,98.15714308,4,4,50,1
+19,7,96.59457561,5,1,71,1
+21,21,74.80634801,2,7,66,1
+19,6,60.61826466,4,3,56,1
+19,6,97.1327425,2,8,74,1
+23,32,77.12736593,6,4,94,1
+21,27,98.66619276,3,8,53,0
+23,36,98.54479908,1,3,85,0
+24,37,94.12037822,7,9,55,1
+24,5,71.77795568,3,4,80,1
diff --git a/core/chapters/c11_Sklearn.py b/core/chapters/c11_Sklearn.py
@@ -518,6 +518,289 @@ def program(self):
 
 
 
+class PracticeSklearn(Page):
+    title = "Practice: Train your own model"
+
+    class loadDataset(VerbatimStep):
+        """
+    Now it is time to practice what you just learned with a new dataset.
+    The dataset has been adjusted to reflect a scenario involving students who either pass or fail a test.
+    The features include age, study hours per week, attendance rate, number of courses taken, assignments completed, test scores, and a target variable indicating whether a student passed the test.
+    Load the dataset and print the first few lines to get an impression.
+    To directly apply your knowledge, replace the “?” with the correct code.
+
+        __copyable__
+        import numpy as np
+        import pandas as pd
+        from sklearn.model_selection import train_test_split
+        from sklearn.preprocessing import StandardScaler
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import cross_val_score
+        from sklearn.model_selection import GridSearchCV
+        import pyodide_http
+
+        pyodide_http.patch_all()  # Necessary for downloading
+
+        # Load data using pandas
+        data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+        # Print the first few lines
+        print(data.?())
+        """
+
+        requirements = "hints"
+        hints = """ test """
+
+        def program(self):
+            import numpy as np
+            import pandas as pd
+            from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.metrics import accuracy_score
+            from sklearn.model_selection import cross_val_score
+            from sklearn.model_selection import GridSearchCV
+            import pyodide_http
+
+            pyodide_http.patch_all()  # Necessary for downloading
+
+            # Load data using pandas
+            data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+            # Print the first few lines
+            print(data.head())
+
+        program_in_text = False
+
+    class TrainModel(VerbatimStep):
+        """
+    Separate features and target variable (here: variable "passed", which indicates whether a student passed the test).
+    Split your data into training and test sets using train_test_split. Use 80% of the data as training data.
+    To directly apply your knowledge, replace the “?” with the correct code.
+
+        __copyable__
+        import numpy as np
+        import pandas as pd
+        from sklearn.model_selection import train_test_split
+        from sklearn.preprocessing import StandardScaler
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import cross_val_score
+        from sklearn.model_selection import GridSearchCV
+        import pyodide_http
+
+        pyodide_http.patch_all()  # Necessary for downloading
+
+        # Load data using pandas
+        data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+        # Separate features and target variable
+        X = data.drop(?, axis=1)
+        y = data[?]
+
+        # Splitting the data into training and testing sets
+        X_train, X_test, y_train, y_test = train_test_split(?, ?, test_size=?, random_state=42)
+
+        """
+
+        requirements = "hints"
+        hints = """ test """
+
+        def program(self):
+            import numpy as np
+            import pandas as pd
+            from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.metrics import accuracy_score
+            from sklearn.model_selection import cross_val_score
+            from sklearn.model_selection import GridSearchCV
+            import pyodide_http
+
+            pyodide_http.patch_all()  # Necessary for downloading
+
+            # Load data using pandas
+            data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+            # Separate features and target variable
+            X = data.drop('Change', axis=1)
+            y = data['Change']
+
+            # Splitting the data into training and testing sets
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+        program_in_text = False
+
+
+    class TrainModel(VerbatimStep):
+        """
+    Now train a simple random forest. Before you do so, preprocess your data using StandardScaler.
+    To directly apply your knowledge, replace the “?” with the correct code.
+
+        __copyable__
+        import pandas as pd
+        from sklearn.model_selection import train_test_split
+        from sklearn.preprocessing import StandardScaler
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import cross_val_score
+        from sklearn.model_selection import GridSearchCV
+        import pyodide_http
+
+        pyodide_http.patch_all()  # Necessary for downloading
+
+        # Load data using pandas
+        data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+        # Separate features and target variable
+        X = data.drop('Change', axis=1)
+        y = data['Change']
+
+        # Splitting the data into training and testing sets
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+        # Standardizing the features (optional for Random Forest, but we'll keep it for consistency)
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(?)
+        X_test = scaler.transform(?)
+
+        # Training a Random Forest model
+        model = RandomForestClassifier(random_state=42)
+        model.fit(?, ?)
+        """
+
+        requirements = "hints"
+        hints = """ test """
+
+        def program(self):
+            import numpy as np
+            import pandas as pd
+            from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.metrics import accuracy_score
+            from sklearn.model_selection import cross_val_score
+            from sklearn.model_selection import GridSearchCV
+            import pyodide_http
+
+            pyodide_http.patch_all()  # Necessary for downloading
+
+            # Load data using pandas
+            data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+            # Separate features and target variable
+            X = data.drop('Change', axis=1)
+            y = data['Change']
+
+            # Splitting the data into training and testing sets
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+            # Standardizing the features (optional for Random Forest, but we'll keep it for consistency)
+            scaler = StandardScaler()
+            X_train = scaler.fit_transform(X_train)
+            X_test = scaler.transform(X_test)
+
+            # Training a Random Forest model
+            model = RandomForestClassifier(random_state=42)
+            model.fit(X_train, y_train)
+
+
+        program_in_text = False
+
+    class TrainModel(VerbatimStep):
+        """
+    Now you can make predictions on the test set and evaluate the performance of your model using metrics like accuracy.
+    To directly apply your knowledge, replace the “?” with the correct code.
+
+        __copyable__
+        import numpy as np
+        import pandas as pd
+        from sklearn.model_selection import train_test_split
+        from sklearn.preprocessing import StandardScaler
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import cross_val_score
+        from sklearn.model_selection import GridSearchCV
+        import pyodide_http
+
+        pyodide_http.patch_all()  # Necessary for downloading
+
+        # Load data using pandas
+        data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+        # Separate features and target variable
+        X = data.drop('Change', axis=1)
+        y = data['Change']
+
+        # Splitting the data into training and testing sets
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+        # Standardizing the features (optional for Random Forest, but we'll keep it for consistency)
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        X_test = scaler.transform(X_test)
+
+        # Training a Random Forest model
+        model = RandomForestClassifier(random_state=42)
+        model.fit(X_train, y_train)
+
+        # Making predictions
+        y_pred = model.predict(?)
+
+        # Evaluating the model
+        accuracy = accuracy_score(?, ?)
+        print(f'Accuracy: {accuracy * 100:.2f}%')
+
+        """
+
+        requirements = "hints"
+        hints = """ test """
+
+        def program(self):
+            import numpy as np
+            import pandas as pd
+            from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.metrics import accuracy_score
+            from sklearn.model_selection import cross_val_score
+            from sklearn.model_selection import GridSearchCV
+            import pyodide_http
+
+            pyodide_http.patch_all()  # Necessary for downloading
+
+            # Load data using pandas
+            data = pd.read_csv('https://raw.githubusercontent.com/aoberm/futurecoder/master/Datasets/Customer.csv')
+
+            # Separate features and target variable
+            X = data.drop('Change', axis=1)
+            y = data['Change']
+
+            # Splitting the data into training and testing sets
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+            # Standardizing the features (optional for Random Forest, but we'll keep it for consistency)
+            scaler = StandardScaler()
+            X_train = scaler.fit_transform(X_train)
+            X_test = scaler.transform(X_test)
+
+            # Training a Random Forest model
+            model = RandomForestClassifier(random_state=42)
+            model.fit(X_train, y_train)
+
+            # Making predictions
+            y_pred = model.predict(X_test)
+
+            # Evaluating the model
+            accuracy = accuracy_score(y_test, y_pred)
+            print(f'Accuracy: {accuracy * 100:.2f}%')
+
+        program_in_text = False
+
+    final_text = """
+        Good job! You have managed to train your own model. Now you can apply your knowledge to various data analysis tasks!
+    """
 
 
 
diff --git a/core/chapters/c12_Pandas.py b/core/chapters/c12_Pandas.py