In [0]:
%sql
CREATE DATABASE avd_db


In [0]:
%sql
CREATE DATABASE IF NOT EXISTS avd_db

In [0]:
%sql 
DROP DATABASE IF EXISTS avd_db

In [0]:
%sql
use avd_db  

In [0]:
%sql
CREATE TABLE emp(
  id int,
  name VARCHAR(50),
  address VARCHAR(100)
)

In [0]:
%sql
DROP TABLE IF EXISTS `avd_db.emp`

## Dataframe to Table

In [0]:
# Create sample data as a Python list of tuples
# Each tuple contains: (language_name, count_value)
data = [
    ("python", 100),
    ("java", 200),
    ("scala", 300)
]

# Convert the Python list into a Spark DataFrame
# Specify column names: "language" and "count"
df = spark.createDataFrame(data, ["language", "count"])

# Display the DataFrame content in a tabular format
df.show()

# Save the DataFrame as a managed table inside a database
# This will create a table named 'course' inside the 'avd_db' database
# Since it's a managed table, Databricks will manage both data + metadata
df.write.saveAsTable("avd_db.course")


+--------+-----+
|language|count|
+--------+-----+
|  python|  100|
|    java|  200|
|   scala|  300|
+--------+-----+



In [0]:
%sql
SELECT * FROM avd_db.course

language,count
python,100
java,200
scala,300


In [0]:
spark.sql("select * from avd_db.course").show()

+--------+-----+
|language|count|
+--------+-----+
|  python|  100|
|    java|  200|
|   scala|  300|
+--------+-----+



In [0]:
%sql
desc avd_db.emp
describe detail avd_db.emp 
describe extended avd_db.emp

col_name,data_type,comment
id,int,
name,varchar(50),
address,varchar(100),


In [0]:
%sql
describe detail avd_db.emp 

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,a15a1210-0119-412a-a9a1-f824450ca08f,avd_workspace1.avd_db.emp,,abfss://unity-catalog-storage@dbstoragevissqnsxh7qno.dfs.core.windows.net/2337203842480748/__unitystorage/catalogs/4f23fad5-6e40-453f-b6b4-da1ac6b5ab2f/tables/b009f6a3-113d-4f7f-8ad8-215e912582c0,2025-11-28T17:59:48.182Z,2025-11-28T17:59:49Z,List(),List(),0,0,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
%sql

describe extended avd_db.emp

col_name,data_type,comment
id,int,
name,varchar(50),
address,varchar(100),
,,
# Detailed Table Information,,
Catalog,avd_workspace1,
Database,avd_db,
Table,emp,
Created Time,Fri Nov 28 17:59:50 UTC 2025,
Last Access,UNKNOWN,


# We can also Create local or global temp _view_

In [0]:
# ----------------------------------------------
# Create a TEMPORARY VIEW (Session Scoped)
# ----------------------------------------------
# This temporary view is available ONLY inside the current notebook/session.
# Once the session ends (cluster restart or notebook detach), the view is removed.
df.createOrReplaceTempView("course_view")

# Querying the temporary view using Spark SQL
spark.sql("SELECT * FROM course_view").show()


# ----------------------------------------------
# Create a GLOBAL TEMPORARY VIEW (Cluster Scoped)
# ----------------------------------------------
# This global temp view is accessible across ALL notebooks and ALL sessions
# running on the same cluster.
#
# It is stored under a special system database called 'global_temp'.
# You must always reference it as: global_temp.<view_name>
df.createOrReplaceGlobalTempView("course_view1")

# Querying the global temporary view using the fully qualified name
spark.sql("SELECT * FROM global_temp.course_view1").show()


+--------+-----+
|language|count|
+--------+-----+
|  python|  100|
|    java|  200|
|   scala|  300|
+--------+-----+

+--------+-----+
|language|count|
+--------+-----+
|  python|  100|
|    java|  200|
|   scala|  300|
+--------+-----+



## Table To Dataframe

In [0]:
# from course table we are creating dataframe

df1 = spark.table("avd_db.course")

df1.show()

+--------+-----+
|language|count|
+--------+-----+
|  python|  100|
|    java|  200|
|   scala|  300|
+--------+-----+



In [0]:
%sql
-- ---------------------------------------------------------------
-- Reading a file DIRECTLY from a Volume (No table creation needed)
-- ---------------------------------------------------------------
-- In Databricks, Volumes allow you to query files directly using SQL.
-- You do NOT need to create a table or load data separately.
-- You can simply point to the file path and run SELECT on it.

-- The syntax:
-- SELECT * FROM `file_format`.`full_file_path`

-- Here, the file is a CSV, so we use: `csv`.`<path>`
-- if we have json then instead of csv we use json


SELECT * FROM `csv`.`/Volumes/avd_workspace1/avd_db/raw_data/employees.csv`;


_c0,_c1,_c2,_c3,_c4,_c5
id,name,salary,address,department,joined_date
1,Rahul,50000,Mumbai,Sales,2021-04-10
2,Neha,45k,Pune,HR,2020-08-15
3,Amit,60000,Delhi,IT,2022-01-19
4,John,55k,Bangalore,Marketing,2021-12-01
5,Meera,70000,Hyderabad,Finance,2023-03-25
6,Vikas,abc,Chennai,Sales,2022-07-05
7,Pooja,50000,InvalidAddressOnly,HR,2021-09-14
8,Sachin,,Mumbai,IT,2020-02-20
9,InvalidRowOnly,,,,
