In [1]:
# Simulating Hive database, table, view, function, and index operations using Python & Pandas

import pandas as pd

# Step 1: Create a "database" (simulate by creating a DataFrame)
data = {
    'id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Cathy', 'David', 'Ella'],
    'salary': [50000, 60000, 55000, 70000, 65000]
}
employees = pd.DataFrame(data)
print("Hive Table (Employees):")
print(employees)

# Step 2: Create a "view" (subset of the main table)
high_salary_view = employees[employees['salary'] > 60000]
print("\nHive View (Employees with salary > 60000):")
print(high_salary_view)

# Step 3: Define a "function" (UDF simulation)
def bonus(salary):
    return salary * 0.10  # 10% bonus

employees['bonus'] = employees['salary'].apply(bonus)
print("\nAfter Applying Hive Function (Bonus Calculation):")
print(employees)

# Step 4: Create an "index" (simulate indexing by setting column as index)
employees.set_index('id', inplace=True)
print("\nHive Index on 'id' column:")
print(employees)


Hive Table (Employees):
   id   name  salary
0   1  Alice   50000
1   2    Bob   60000
2   3  Cathy   55000
3   4  David   70000
4   5   Ella   65000

Hive View (Employees with salary > 60000):
   id   name  salary
3   4  David   70000
4   5   Ella   65000

After Applying Hive Function (Bonus Calculation):
   id   name  salary   bonus
0   1  Alice   50000  5000.0
1   2    Bob   60000  6000.0
2   3  Cathy   55000  5500.0
3   4  David   70000  7000.0
4   5   Ella   65000  6500.0

Hive Index on 'id' column:
     name  salary   bonus
id                       
1   Alice   50000  5000.0
2     Bob   60000  6000.0
3   Cathy   55000  5500.0
4   David   70000  7000.0
5    Ella   65000  6500.0
