
# Programming and Data Science Snippets

This Jupyter Notebook serves as a reference for important code snippets in various programming languages, with a focus on data science applications. Below are categorized examples of common tasks and techniques used in programming, data manipulation, visualization, and machine learning.

## Table of Contents

1. [Python Basics](#python-basics)
   - [Hello, World!](#hello-world)
   - [Variables and Data Types](#variables-and-data-types)
   - [Control Flow (if-else)](#control-flow-if-else)
   - [Loops (for and while)](#loops-for-and-while)
   - [Functions](#functions)
   - [File Handling](#file-handling)
   - [Exception Handling](#exception-handling)
   - [List Comprehensions](#list-comprehensions)
   - [Classes and Objects](#classes-and-objects)

2. [Data Science with Python](#data-science-with-python)
   - [Data Manipulation with Pandas](#data-manipulation-with-pandas)
   - [Data Visualization with Matplotlib](#data-visualization-with-matplotlib)
   - [Data Visualization with Seaborn](#data-visualization-with-seaborn)
   - [Loading and Handling Dates with Pandas](#loading-and-handling-dates-with-pandas)
   - [Machine Learning with Scikit-Learn](#machine-learning-with-scikit-learn)
   - [Working with NumPy Arrays](#working-with-numpy-arrays)

In [None]:
1. Hello, World!

In [None]:
print("Hello, World!")


In [None]:
2. Variables and Data Types


# Variable assignment


In [None]:
x = 10


# Data types


In [None]:
integer_var = 5
float_var = 3.14
string_var = "Hello, Python!"

# Lists


In [None]:
my_list = [1, 2, 3, 4, 5]


# Dictionaries


In [None]:
my_dict = {"key": "value", "name": "John"}

3. Control Flow (if-else)


# if-else statement


In [None]:
x = 10
if x > 5:
    print("x is greater than 5")
else:
    print("x is not greater than 5")

4. Loops (for and while)


# for loop


In [None]:
for i in range(5):
    print(i)



# while loop

counter = 0
while counter < 5:
    print(counter)
    counter += 1

5. Functions


# Function definition


In [None]:
def greet(name):
    return f"Hello, {name}!"

# Function call


In [None]:
result = greet("Alice")
print(result)

6. File Handling


# Writing to a file


In [None]:
with open("example.txt", "w") as file:
    file.write("Hello, File!")


# Reading from a file


In [None]:
with open("example.txt", "r") as file:
    content = file.read()
    print(content)

7. Exception Handling


# try-except block


In [None]:
try:
    result = 10 / 0
except ZeroDivisionError:
    print("Cannot divide by zero!")

8. List Comprehensions


In [None]:
squares = [x**2 for x in range(1, 6)]
print(squares)

9. Classes and Objects


# Class definition


In [None]:
class Dog:
    def __init__(self, name, age):
        self.name = name
        self.age = age

    def bark(self):
        print("Woof!")

# Object instantiation


In [None]:
my_dog = Dog("Buddy", 3)
my_dog.bark()

DATA & PYTHON

1. Data Manipulation with Pandas


In [None]:
import pandas as pd

# Creating a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'San Francisco', 'Los Angeles']}
df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)

# Selecting columns
ages = df['Age']

# Filtering data
filtered_data = df[df['Age'] > 30]

# Grouping data
grouped_data = df.groupby('City').mean()

2. Data Visualization with Matplotlib


In [None]:
import matplotlib.pyplot as plt

# Line plot
plt.plot([1, 2, 3, 4], [10, 15, 25, 30])
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Simple Line Plot')
plt.show()

# Scatter plot
plt.scatter(df['Age'], df['City'])
plt.xlabel('Age')
plt.ylabel('City')
plt.title('Scatter Plot')
plt.show()


3. Data Visualization with Seaborn


In [None]:
import seaborn as sns

# Pair plot for exploring relationships in a DataFrame
sns.pairplot(df)
plt.show()

# Heatmap for visualizing correlations
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='viridis')
plt.show()


4. Loading and Handling Dates with Pandas


In [None]:
# Loading a CSV file with date columns
date_df = pd.read_csv('data.csv', parse_dates=['date_column'])

# Extracting components of a date
date_df['year'] = date_df['date_column'].dt.year
date_df['month'] = date_df['date_column'].dt.month
date_df['day'] = date_df['date_column'].dt.day


5. Machine Learning with Scikit-Learn


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating and training a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions
predictions = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')


6. Working with NumPy Arrays


In [None]:
import numpy as np

# Creating a NumPy array
arr = np.array([1, 2, 3, 4, 5])

# Performing operations on arrays
arr_squared = np.square(arr)
arr_sum = np.sum(arr)


7. EDA with Pandas

In [None]:
# Displaying basic statistics of the DataFrame
summary_statistics = df.describe()

# Counting missing values in each column
missing_values = df.isnull().sum()

# Sorting and filtering data
sorted_data = df.sort_values(by='Age', ascending=False)
filtered_data = df[df['City'].isin(['New York', 'San Francisco'])]

# Applying a function to a column
df['Age_squared'] = df['Age'].apply(lambda x: x**2)


8. Data Cleaning and Preprocessing


In [None]:
# Handling missing values (filling with mean)
df['Age'].fillna(df['Age'].mean(), inplace=True)

# Dropping duplicate rows
df.drop_duplicates(inplace=True)

# One-hot encoding categorical variables
df_encoded = pd.get_dummies(df, columns=['City'], prefix='City')


9. Data Visualization with Plotly


In [None]:
import plotly.express as px

# Scatter plot with Plotly Express
fig = px.scatter(df, x='Age', y='City', color='Name', size='Age', title='Scatter Plot with Plotly')
fig.show()


10. Time Series Analysis with Pandas


In [None]:
# Setting the DataFrame index to a datetime column
date_df.set_index('date_column', inplace=True)

# Resampling time series data
monthly_resampled = date_df.resample('M').mean()

# Plotting time series data
date_df.plot(title='Time Series Data')


11. Working with Large Datasets using Dask


In [None]:
import dask.dataframe as dd

# Reading a large CSV file with Dask
dask_df = dd.read_csv('large_data.csv')

# Performing computations on a Dask DataFrame
mean_age = dask_df['Age'].mean().compute()


12. Statistical Analysis with SciPy


In [None]:
import scipy.stats as stats

# Perform t-test or other statistical tests
t_stat, p_value = stats.ttest_ind(df['Group_A'], df['Group_B'])


13. Feature Scaling and Normalization


In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Standardize numerical features
scaler = StandardScaler()
df[['Feature1', 'Feature2']] = scaler.fit_transform(df[['Feature1', 'Feature2']])

# Normalize numerical features
minmax_scaler = MinMaxScaler()
df[['Feature1', 'Feature2']] = minmax_scaler.fit_transform(df[['Feature1', 'Feature2']])


14. Handling Imbalanced Classes in Machine Learning


In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# Handling imbalanced classes using SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)

# Splitting resampled data
X_train_resampled, X_test_resampled, y_train_resampled, y_test_resampled = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)


15. Interactive Data Exploration with Plotly Dash


In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html

# Dash application example
app = dash.Dash(__name__)

app.layout = html.Div(children=[
    html.H1(children='Interactive Data Exploration'),

    dcc.Graph(
        id='scatter-plot',
        figure={
            'data': [
                {'x': df['Age'], 'y': df['City'], 'type': 'scatter', 'mode': 'markers'}
            ],
            'layout': {
                'title': 'Interactive Scatter Plot'
            }
        }
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)


16. Advanced Machine Learning Topics


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Hyperparameter tuning with GridSearchCV
param_grid = {'n_estimators': [50, 100, 150], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_


17. Web Scraping with BeautifulSoup and Requests


In [None]:
import requests
from bs4 import BeautifulSoup

# Scraping a website
url = 'https://example.com'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')


18. Working with APIs


In [None]:
import requests

# Making an API request
api_url = 'https://api.example.com/data'
response = requests.get(api_url)
data = response.json()


19. Regular Expressions


In [None]:
import re

# Using regular expressions
pattern = r'\b\d{3}-\d{2}-\d{4}\b'  # Example: Social Security Number
text = '123-45-6789'
match = re.match(pattern, text)


20. Data Serialization (JSON, Pickle)


In [None]:
import json
import pickle

# Serialize data to JSON
json_data = json.dumps({'key': 'value'})

# Serialize data using Pickle
pickle_data = pickle.dumps({'key': 'value'})


# R Basics and Data Science Snippets
The following sections cover fundamental R concepts and practical examples for data manipulation, visualization, and statistical analysis.

## Table of Contents

1. [Introduction to R](#introduction-to-r)
2. [Basic Data Manipulation with `dplyr`](#basic-data-manipulation-with-dplyr)
3. [Data Visualization with `ggplot2`](#data-visualization-with-ggplot2)
4. [Loading and Handling Dates with `lubridate`](#loading-and-handling-dates-with-lubridate)
5. [Statistical Analysis with `stats`](#statistical-analysis-with-stats)
6. [Machine Learning with `caret`](#machine-learning-with-caret)
7. [Working with Large Datasets using `data.table`](#working-with-large-datasets-using-data.table)


1. Basic Data Manipulation with dplyr

In [None]:
# Installing and loading the dplyr package
install.packages("dplyr")
library(dplyr)

# Creating a data frame
df <- data.frame(
  Name = c("Alice", "Bob", "Charlie"),
  Age = c(25, 30, 35),
  City = c("New York", "San Francisco", "Los Angeles")
)

# Displaying the data frame
print(df)

# Selecting columns
selected_cols <- select(df, Name, Age)

# Filtering data
filtered_data <- filter(df, Age > 30)

# Grouping data
grouped_data <- df %>% group_by(City) %>% summarise(mean_age = mean(Age))


2. Data Visualization with ggplot2

# Installing and loading the ggplot2 package
install.packages("ggplot2")
library(ggplot2)

# Creating a scatter plot
ggplot(df, aes(x = Age, y = City, color = Name, size = Age)) +
  geom_point() +
  labs(title = "Scatter Plot with ggplot2")

# Creating a bar plot
ggplot(df, aes(x = City, fill = Name)) +
  geom_bar() +
  labs(title = "Bar Plot with ggplot2")


3. Loading and Handling Dates with lubridate

# Installing and loading the lubridate package
install.packages("lubridate")
library(lubridate)

# Creating a Date object
date <- ymd("2023-01-01")

# Extracting components of a date
year_val <- year(date)
month_val <- month(date)
day_val <- day(date)


4. Statistical Analysis with stats

# Performing a t-test
group_a <- c(23, 25, 28, 30, 32)
group_b <- c(19, 20, 22, 24, 25)
t_test_result <- t.test(group_a, group_b)
print(t_test_result)


5. Machine Learning with caret

# Installing and loading the caret package
install.packages("caret")
library(caret)

# Splitting data into training and testing sets
set.seed(123)
split_index <- createDataPartition(df$Age, p = 0.8, list = FALSE)
train_data <- df[split_index, ]
test_data <- df[-split_index, ]

# Training a linear regression model
model <- train(Age ~ ., data = train_data, method = "lm")

# Making predictions
predictions <- predict(model, newdata = test_data)


6. Working with Large Datasets using data.table

# Installing and loading the data.table package
install.packages("data.table")
library(data.table)

# Creating a data.table
dt <- data.table(df)

# Filtering and grouping with data.table
filtered_dt <- dt[Age > 30]
grouped_dt <- dt[, .(mean_age = mean(Age)), by = City]


# SQL for Data Analysis

The following includes SQL code snippets commonly used in data analysis tasks.

## Table of Contents

1. [Connecting to a Database](#connecting-to-a-database)
2. [Querying Data](#querying-data)
3. [Filtering and Sorting](#filtering-and-sorting)
4. [Aggregation](#aggregation)
5. [Joining Tables](#joining-tables)
6. [Creating a Table](#creating-a-table)
7. [Inserting Data into a Table](#inserting-data-into-a-table)
8. [Updating Data in a Table](#updating-data-in-a-table)
9. [Deleting Data from a Table](#deleting-data-from-a-table)
10. [Indexing a Column](#indexing-a-column)
11. [Aggregating Data with GROUP BY](#aggregating-data-with-group-by)
12. [Subqueries](#subqueries)
13. [Case Statements](#case-statements)
14. [Window Functions](#window-functions)



Connecting to a Database

Connecting to SQLite Database


In [None]:
-- Connecting to a SQLite database
-- Replace 'your_database.db' with the actual SQLite database file path
ATTACH DATABASE 'your_database.db' AS db;


Connecting to MySQL Database


In [None]:
-- Connecting to a MySQL database
-- Replace 'your_host', 'your_database', 'your_user', and 'your_password' with your MySQL connection details
USE your_database;


Connecting to PostgreSQL Database


In [None]:
-- Connecting to a PostgreSQL database
-- Replace 'your_host', 'your_database', 'your_user', 'your_password', and 'your_port' with your PostgreSQL connection details
\c dbname=your_database user=your_user password=your_password host=your_host port=your_port;


Connecting to Microsoft SQL Server Database


In [None]:
-- Connecting to a Microsoft SQL Server database
-- Replace 'your_server', 'your_database', 'your_user', and 'your_password' with your SQL Server connection details
USE your_database;


Connecting to Oracle Database


In [None]:
-- Connecting to an Oracle database
-- Replace 'your_host', 'your_port', 'your_sid', 'your_user', and 'your_password' with your Oracle connection details
CONNECT your_user/your_password@//your_host:your_port/your_sid;


2. Querying Data


In [None]:
-- Selecting all columns from a table
SELECT * FROM your_table;

-- Selecting specific columns
SELECT column1, column2 FROM your_table;


3. Filtering and Sorting


In [None]:
-- Filtering rows based on a condition
SELECT * FROM your_table WHERE column1 > 100;

-- Sorting rows in ascending order
SELECT * FROM your_table ORDER BY column1 ASC;


4. Aggregation


In [None]:
-- Calculating the average value of a column
SELECT AVG(column1) AS average_value FROM your_table;

-- Counting the number of rows
SELECT COUNT(*) AS row_count FROM your_table;


5. Joining Tables


In [None]:
-- Inner join
SELECT * FROM table1
INNER JOIN table2 ON table1.id = table2.id;

-- Left join
SELECT * FROM table1
LEFT JOIN table2 ON table1.id = table2.id;


6. Creating a Table


In [None]:
-- Creating a table
CREATE TABLE your_table (
    column1 INT,
    column2 VARCHAR(255),
    column3 DATE
);


7. Inserting Data into a Table


In [None]:
-- Inserting data into a table
INSERT INTO your_table (column1, column2, column3)
VALUES (1, 'John Doe', '2023-01-01');


8. Updating Data in a Table


In [None]:
-- Updating data in a table
UPDATE your_table
SET column2 = 'Jane Doe'
WHERE column1 = 1;


9. Deleting Data from a Table


In [None]:
-- Deleting data from a table
DELETE FROM your_table
WHERE column1 = 1;


10. Indexing a Column


In [None]:
-- Creating an index on a column
CREATE INDEX idx_column1 ON your_table (column1);


11. Aggregating Data with GROUP BY


In [None]:
-- Aggregating data with GROUP BY
SELECT column1, COUNT(*) as count
FROM your_table
GROUP BY column1;


12. Subqueries


In [None]:
-- Using subqueries
SELECT column1
FROM your_table
WHERE column2 IN (SELECT column2 FROM other_table WHERE condition);


13. Case Statements


In [None]:
-- Using CASE statements
SELECT column1,
       CASE
           WHEN column1 > 10 THEN 'High'
           WHEN column1 > 5 THEN 'Medium'
           ELSE 'Low'
       END as priority
FROM your_table;


14. Window Functions


In [None]:
-- Using window functions (example: calculating row numbers)
SELECT column1,
       ROW_NUMBER() OVER (ORDER BY column1) as row_number
FROM your_table;


# Javascript

1. Variables and Data Types

In [None]:
// Variables and Data Types
let variableName = 'Hello, World!';
const constantValue = 42;
var olderWayToDeclareVariable = true;

// Data types
let numberValue = 123;
let stringValue = 'abc';
let booleanValue = true;
let arrayValue = [1, 2, 3];
let objectValue = { key: 'value' };


2. Control Flow - if-else Statement


In [None]:
// Control Flow - if-else Statement
let condition = true;

if (condition) {
  console.log('Condition is true');
} else {
  console.log('Condition is false');
}


3. Loops - for and while


In [None]:
// Loops - for Loop
for (let i = 0; i < 5; i++) {
    console.log(`Iteration ${i}`);
  }
  
  // Loops - while Loop
  let counter = 0;
  while (counter < 3) {
    console.log(`Count: ${counter}`);
    counter++;
  }
  

4. Functions


In [None]:
// Functions
function greet(name) {
    return `Hello, ${name}!`;
  }
  
  let greeting = greet('John');
  console.log(greeting);
  

5. Arrays - Map and Filter


In [None]:
// Arrays - Map
let numbers = [1, 2, 3, 4, 5];
let squared = numbers.map(num => num * num);
console.log(squared);

// Arrays - Filter
let evenNumbers = numbers.filter(num => num % 2 === 0);
console.log(evenNumbers);


6. Objects


In [None]:
// Objects
let person = {
    name: 'Alice',
    age: 30,
    profession: 'Engineer',
  };
  
  console.log(person.name); // Accessing object property
  

7. Asynchronous JavaScript - Promises


In [None]:
// Asynchronous JavaScript - Promises
function fetchData() {
    return new Promise((resolve, reject) => {
      // Simulating asynchronous operation
      setTimeout(() => {
        let data = 'Async data';
        resolve(data);
      }, 2000);
    });
  }
  
  fetchData()
    .then(result => console.log(result))
    .catch(error => console.error(error));
  

8. JSON Parsing


In [None]:
// JSON Parsing
let jsonString = '{"name": "Bob", "age": 25}';
let parsedObject = JSON.parse(jsonString);
console.log(parsedObject.name); // Output: Bob


9. Error Handling with Try...Catch


In [None]:
// Error Handling with Try...Catch
try {
    // Code that might throw an error
    throw new Error('Something went wrong');
  } catch (error) {
    console.error(error.message);
  }
  