In [0]:
# Drop existing silver table if it exists
spark.sql("DROP TABLE IF EXISTS silver_db.jobs_silver")

# Optionally, drop silver database too
# spark.sql("DROP DATABASE IF EXISTS silver_db CASCADE")


DataFrame[]

In [0]:
# Read the bronze table from Delta Lake
bronze_df = spark.read.table("bronze_db.jobs_bronze")


bronze_df.printSchema()
print(f"Bronze Row Count: {bronze_df.count()}")


root
 |-- job_id: long (nullable = true)
 |-- title_raw: string (nullable = true)
 |-- company: string (nullable = true)
 |-- location_raw: string (nullable = true)
 |-- post_date: string (nullable = true)
 |-- url: string (nullable = true)
 |-- type_raw: string (nullable = true)
 |-- levels_raw: string (nullable = true)
 |-- categories_raw: string (nullable = true)
 |-- contents: string (nullable = true)
 |-- _ingest_ts: string (nullable = true)
 |-- _run_label: string (nullable = true)
 |-- _source: string (nullable = true)

Bronze Row Count: 1212


In [0]:
from pyspark.sql.functions import col, count, when

bronze_df.select([
    count(when(col(c).isNull(), c)).alias(c) for c in bronze_df.columns
]).show(vertical=True)


-RECORD 0-------------
 job_id         | 0   
 title_raw      | 0   
 company        | 0   
 location_raw   | 10  
 post_date      | 0   
 url            | 0   
 type_raw       | 212 
 levels_raw     | 0   
 categories_raw | 0   
 contents       | 0   
 _ingest_ts     | 0   
 _run_label     | 0   
 _source        | 0   



In [0]:
display(bronze_df.show())


+--------+--------------------+--------------------+-----------------+----------+--------------------+--------+------------+------------------+--------------------+-------------------+----------+------------+
|  job_id|           title_raw|             company|     location_raw| post_date|                 url|type_raw|  levels_raw|    categories_raw|            contents|         _ingest_ts|_run_label|     _source|
+--------+--------------------+--------------------+-----------------+----------+--------------------+--------+------------+------------------+--------------------+-------------------+----------+------------+
| 3104241|Merchandise and S...|             Walmart|  Albuquerque, NM|2025-08-13|https://www.themu...|external|   Mid Level|Data and Analytics|<p><b>Position Su...|2025-08-27T23:02:58|    manual|the_muse_api|
| 4278354|Senior Data Scien...|            Bluecore|     New York, NY|2024-11-01|https://www.themu...|external|Senior Level|Data and Analytics|<div>Bluecore is ...|

In [0]:


# Step 1: Transform bronze data
from pyspark.sql.functions import trim, col, regexp_replace, when

# Load Bronze
bronze_df = spark.table("bronze_db.jobs_bronze")

# Transform to Silver
silver_df = (
    bronze_df
    .filter(col("location_raw").isNotNull() & col("type_raw").isNotNull())
    .withColumn("location", trim(col("location_raw")))
    
    # Clean type_raw → remove [ ] and ' characters
    .withColumn("type_raw_clean", regexp_replace(col("type_raw"), r"[\[\]']", ""))
    
    # Replace empty strings with null
    .withColumn("type_cleaned", when(trim(col("type_raw_clean")) == "", None).otherwise(trim(col("type_raw_clean"))))
    
    # Final type column
    .withColumn("type", col("type_cleaned"))

    .drop("type_raw_clean", "type_cleaned")  # optional cleanup
)

# Just before saving to silver table
silver_df = silver_df.withColumn("contents", regexp_replace("contents", "<[^>]*>", ""))



# Save to Silver Table
silver_df.write.mode("overwrite").option("mergeSchema", "true").format("delta").saveAsTable("silver_db.jobs_silver")


# Optional preview
silver_df.show(truncate=False)


+--------+---------------------------------------------------------------------------------------------------------------+---------------------------------+-----------------+----------+--------------------------------------------------------------------------------------------------------------------------------------------+--------+------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [0]:
# Check distinct values in type column
silver_df.select("type").distinct().show(truncate=False)



+--------+
|type    |
+--------+
|external|
+--------+



In [0]:
# Inspect raw column to debug
silver_df.select("type_raw").distinct().show(truncate=False)


+--------+
|type_raw|
+--------+
|external|
+--------+



In [0]:
silver_table_name = "jobs_silver"
full_silver_path = f"silver_db.{silver_table_name}"

(
    silver_df
    .write
    .mode("overwrite")        # since we're starting clean
    .format("delta")
    .option("mergeSchema", "true") 
    .saveAsTable(full_silver_path)
)

print(f"✅ Silver table written: {full_silver_path}")

✅ Silver table written: silver_db.jobs_silver


In [0]:
display(spark.sql(f"SELECT * FROM {full_silver_path} LIMIT 10"))
#display(spark.sql("SELECT * FROM bronze_db.jobs_bronze LIMIT 10"))

job_id,title_raw,company,location_raw,post_date,url,type_raw,levels_raw,categories_raw,contents,_ingest_ts,_run_label,_source,location,type
3104241,Merchandise and Stocking Associate,Walmart,"Albuquerque, NM",2025-08-13,https://www.themuse.com/jobs/walmart/merchandise-and-stocking-associate-68d372,external,Mid Level,Data and Analytics,"Position Summary...Do you like to work on your feet and keep things neat and organized? Our merchandising & stocking associates connect all of the dots to make sure members can find everything they have on their shopping list. From every day needs to special occasions, members need you, as their mini tour guide, to take them that special product. Depending on the shift you work, your job could include moving inventory in the backroom, unloading trucks, fulfilling club pick-up orders or helping members while stocking shelves. From unloading trucks in the summer to filling ice cream in the freezer, this fast-paced job can be physically demanding - it's like being paid to go to the gym! Be a part of a great team with a common goal - making sure members can find more of what they love, for less. You will sweep us off our feet if:• You thrive in fast-paced environments • You keep member satisfaction as your top priority• You're comfortable with change and quickly adapt to different work scenarios• You're a problem solver who tackles obstacles head-on to ensure each task is completed with excellence• You are able to pick up boxes and other heavy objects weighing more than 25 poundsYou will make an impact by:• Promptly unloading trucks • Assisting fellow associates as needed throughout the store• Sorting and stocking products on shelves and in the backroom• Engaging with vendors and drivers with a positive attitude• Maintaining a clean, neat, and member-ready areaThe merchandising & stocking associate role is a great way to start a fulfilling career at Sam's Club. Apply now!The above information has been designed to indicate the general nature and level of work performed in the role. It is not designed to contain or be interpreted as a comprehensive inventory of all duties, responsibilities and qualifications required of employees assigned to this job. The full job description can be made available as part of the hiring process.What you'll do...Provides member service by acknowledging the member and identifying member needs; providing guidance and support to members regarding self- service technology; assisting members with purchasing decisions; locating merchandise; resolving member issues and concerns; and promoting the company's products and services.Maintains safety of facility according to company policies and procedures by conducting safety sweeps; following forklift spotting procedures; following procedures for handling and disposing of hazardous materials; following company steel standard guidelines; and correcting and reporting unsafe situations to management.Maintains the sales floor and merchandise presentation in accordance with company policies and procedures by properly zoning the area; stocking, arranging and organizing merchandise; setting up, cleaning, and organizing product displays; removing damaged goods; signing and pricing merchandise according to company policies and procedures; identifying shrink and damages; and securing fragile and high-shrink merchandise.Monitors food and merchandise quality by ensuring product rotation, code dating, product recalls, and sanitation standards are followed according to company policies and procedures; and ensuring prominent display of promotional and seasonal merchandise.Complies with company policies, procedures, and standards of ethics and integrity by implementing related action plans; using the Open Door Policy; and applying these in executing business processes and practices.Completes work assignments and priorities by using policies, data, and resources; collaborating with managers, co-workers, customers, and other business partners; identifying priorities, deadlines, and expectations; carrying out tasks; communicating progress and information; determining and recommending ways to address improvement opportunities; and adapting to and learning from change, difficulties, and feedback.Ensures club pick up orders are filled by assisting with picking club pick up orders throughout the day and staging; and ensuring products are selected and staged according to company policy and procedures.At Sam's Club, we offer competitive pay as well as performance-based bonus awards and other great benefits for a happier mind, body, and wallet! -Health benefits include medical, vision and dental coverage-Financial benefits include 401(k), stock purchase and company-paid life insurance-Paid time off benefits include PTO, parental leave, family care leave, bereavement, jury duty, and voting. You will also receive PTO and/or PPTO that can be used for vacation, sick leave, holidays, or other purposes. The amount you receive depends on your job classification and length of employment. It will meet or exceed the requirements of paid sick leave laws, where applicable. For information about PTO, see https://one.walmart.com/notices.- Other benefits include short-term and long-term disability, company discounts, Military Leave Pay, adoption and surrogacy expense reimbursement, and more. Live Better U is a company paid education benefit program for full-time and part-time associates in Walmart and Sam's Club facilities. Programs range from high school completion to bachelor's degrees, including English Language Learning and short-form certificates. Tuition, books, and fees are completely paid for by Walmart. Eligibility requirements apply to some benefits and may depend on your job classification and length of employment. Benefits are subject to change and may be subject to a specific plan or program terms. For information about benefits and eligibility, see One.Walmart at https://bit.ly/3iOOb1J. The hourly wage range for this position is $16.00 to $23.00The actual hourly rate will equal or exceed the required minimum wage applicable to the job location. Additional compensation in the form of premiums may be paid in amounts ranging from $0.35 per hour to $3.00 per hour in specific circumstances. Premiums may be based on schedule, facility, season, or specific work performed. Multiple premiums may apply if applicable criteria are met.Minimum Qualifications...Outlined below are the required minimum qualifications for this position. If none are listed, there are no minimum qualifications.Preferred Qualifications...Outlined below are the optional preferred qualifications for this position. If none are listed, there are no preferred qualifications.Primary Location...300 EUBANK BLVD NE, ALBUQUERQUE, NM 87123-2755, United States of America",2025-08-27T23:02:58,manual,the_muse_api,"Albuquerque, NM",external
4278354,Senior Data Scientist,Bluecore,"New York, NY",2024-11-01,https://www.themuse.com/jobs/bluecore/senior-data-scientist,external,Senior Level,Data and Analytics,"Bluecore is a marketing technology company that’s reimagining how the world’s fastest growing retail brands transform casual shoppers into lifetime customers. Through our patented retail data model and the recent release of Bluecore Communicate™ and Bluecore Site™, we replace manual processes with an intelligent, AI-driven workflow. We are credited with doubling email revenue, and increasing customer retention for more than 400 brands, including Express, Tommy Hilfiger, The North Face, Teleflora, and Bass Pro Shops. We have been recognized as one of the Best Places to Work by Glassdoor and ranked No. 241 on the Inc. 500 List, the most prestigious ranking of the nation’s fastest-growing private companies. We are looking for Senior Data Scientists with a strong mathematical background to work alongside our Engineering Teams to build the next generation of retail and commerce models that delight and empower marketers. The ideal candidate is one that has several years of experience researching, building, serving, and maintaining data science models at scale. They are able to work with our Product Team to translate product requirements into the correct objectives, perform literature searches to identify the right approach, design thoughtful experiments, and write production code to serve the model and maintain it. They have first-hand experience with what works and what doesn’t, and are eager to share this experience with more junior members and guide them through that process. They are also able to, and excited to, help architect the data science infrastructure needed to accelerate innovation on models and facilitate serving and maintaining them. Finally, they should be curious and eager to identify and explore the myriad of other products that can be built on our unique data asset. Our culture emphasizes making good tradeoffs, working as a team, and leaving your ego at the door. Over the past six years Bluecore has shown that Marketing Teams can create meaningful and valuable experiences for their customers using only first party data; an increasingly important proposition with the rise in concerns around online privacy and third party data. As a Senior Data Scientist, you'll be joining a dedicated group of Data Scientists and Engineers at the forefront of exploring exciting ways to activate that data asset. We build powerful models that enable Marketers to make the right decisions and engage their customers with personalized content that is timely, relevant, and valuable. Our approach to building models is an academic one, starting with a literature search, a baseline, and an iterative process of training and validation to identify the most suitable model that is as simple as possible and as powerful as necessary. We employ a wide variety of models, such as Bayesian models for predicting customer lifetime value, matrix factorization to identify a customer’s product affinity, and reinforcement learning models to optimize content, timing, and frequency of marketing communications. Our models operate at scale and crunch through millions of data points to make decisions that have been shown to double revenue and triple reach, and are designed in a flexible manner to generalize across our set of 400+ diverse customers who span industries from apparel to automotive. To explore, build, deploy, and maintain models we leverage many tools such as BigQuery, Spark, Cloud SQL, Keras, TensorFlow, Airflow, Kubernetes, and Google Compute Engine. Finally, we’re a team that values applied research and constantly exploring the frontier of what’s possible; diving into fields such as topic modeling, restricted Boltzmann machines, recurrent neural networks, convolutional neural networks for image feature extraction, and differential privacy. ResponsibilitiesIdentifying appropriate models/algorithms to solve product requirementsMeticulous experimentation to evaluate and compare modelsWriting internal and external facing documentation describing models and approachesDeploying models to production and maintaining themIdentify new opportunities to leverage our data assetPropose and drive technical initiativesPropose infrastructure to accelerate the pace of model exploration and improve model serving and maintenance QualificationsPhD or MS in a quantitative discipline such as Applied Math, Data Science, Physics, Statistics, or EngineeringRelevant coursework and experience in the fields of Machine Learning, Statistics, and OptimizationDeep understanding of Statistical/Probabilistic Analysis and Linear Algebra3+ years of relevant industry experience, including internshipsAbility to write production-ready codeExperience with Deep Learning, Reinforcement Learning, Natural Language Processing, Time Series Analysis, and/or OptimizationExperience with ML at scaleExperience with SQL Benefits: Highly competitive compensation package including salary and equity as well as the opportunity to work for one of the fastest growing marketing technology companies. - Comprehensive medical, dental, and vision insurance - 401(k) plan - Monthly discretionary reimbursement towards fitness, home office and/or Learning and Development opportunities - Generous Parental Leave & flexible vacation policy We are proud of the culture of flexibility, inclusivity and trust that we have built around our workforce. We are a remote first organization with the option to potentially work in our New York headquarters on occasion moving forward. We love the opportunity to come together – but employees will always have the option on where they work best. At Bluecore we believe in encouraging an inclusive environment in which employees feel encouraged to share their unique perspectives, demonstrate their strengths, and act authentically. We know that diverse teams are strong teams, and welcome those from all backgrounds and varying experiences. Bluecore is a proud equal opportunity employer. We are committed to fair hiring practices and to building a welcoming environment for all team members. All qualified applicants will receive consideration for employment without regard to race, color, religion, gender, gender identity or expression, sexual orientation, national origin, disability, age, familial status or veteran status. We will ensure that individuals with disabilities are provided reasonable accommodation to participate in the job application or interview process, to perform essential job functions, and to receive other benefits and privileges of employment. Please contact us to request accommodation.",2025-08-27T23:02:58,manual,the_muse_api,"New York, NY",external
5049794,"Staff Software Engineer, Data Science Infrastructure",Bluecore,"New York, NY",2021-07-16,https://www.themuse.com/jobs/bluecore/staff-software-engineer-data-science-infrastructure,external,Senior Level,Data and Analytics,"Data Science at Bluecore is a cross-functional effort that focuses on delivering ML products. While we take pride in the models we build and invest heavily in principled research, we understand that models only provide value if they are incorporated into a product that is easy to adopt and provides significant value. To that end we have invested in a Data Science Infrastructure Engineering team to build the orchestration and compute platforms that enable rapid iteration, deployment and monitoring, work with our Data Scientists to scale our models to leverage our large data, and serve the models at scale. We are looking for a Staff Software Engineer to join this Data Science Infrastructure Engineering team to help design and build our next generation ML platform. They will join forces with other cross-functional engineering teams to build scalable, extensible, reliable, and performant systems that provide significant company and industry-level impact. This position requires an influential leader who can actively code while simultaneously helping our team design and implement technical solutions. They will be designing and building services in Python, Go and gRPC running on Kubernetes and the Google Cloud Platform, leveraging BigQuery, Datastore, and BigTable for storage. They will also be working with our Data Scientists to leverage technologies including Beam, Spark, TensorFlow and PyTorch to efficiently leverage and activate our data. Responsibilities Be a key contributor on major projects owned by the Data Science Infrastructure squad, such as: building our next-generation AI platform, building scalable and efficient model serving infrastructure across multiple channels, working with data scientists to optimize and scale their models.Design, architect, and build performant, reliable, high-quality systems at scale within one or more engineering domains.Work cross-functionally with product managers, data scientists, and engineers to deliver high quality products.Evangelize Bluecore Engineering internally and externally, including leading initiatives to promote Bluecore Engineering in the wider community.Proactively identify technology opportunities for the company, and push technical ideas, proposals, and plans to the entire organization and beyond through presentations, technical specs, blog posts, and documentation. Lead company-wide technical strategy and decisions and facilitate progress by building consensus amongst Senior Engineering Leadership and Company Leadership. Advise and advocate for best tools, methods, and approaches for the entire Engineering organization.Function as a sought-out mentor and leader for a technical domain or set of domains across the company. Increase the technical capabilities and upper limits of your own areas of responsibility and expertise, as well as those of your associated team members. Teach sessions, write documentation, and actively mentor to decrease the ramp-up time of new and existing team members. QualificationsSignificant experience working with Data Scientists to develop, build and serve models at scale. Significant experience with leveraging languages and tools like Spark, Beam, TensorFlow, PyTorch to train and serve models. Significant programming expertise with statically-typed languages, preferably Go. Use of performance optimization tools and past experience in a SRE function is a plus.10+ years of relevant professional experience, including recent experience in a similar industry or technology space. Experience building MarTech, AdTech, eCommerce, E-mail, or Big Data platforms and products highly desirable.B.S./B.E in one of the following departments (i) Computer Science, (ii) Computer Engineering, (iii) Information Sciences, (iv) Electronics, (v) Mathematics, or relevant field/equivalent work experience.Experience excelling within a high-growth, startup environment or building out a new team/function within a larger company is preferred.Experience with technical team mentorship, including guiding other engineers to become more effective technical leaders and providing feedback on best practices in code and design. Ability to identify your team's dependencies on other areas and across functions, communicate effectively to remove any immediate blockers, and propose and implement process changes that strengthen and maintain efficiency. Strong communication skills with internal stakeholders on long-term development plans, as well as in articulating short-term issues and delivery. Proven track record of successfully delivering sizable projects that span multiple teams and multiple quarters--or even years!-- that produce significant results and impact overall business success. Benefits: Highly competitive compensation package including salary and equity as well as the opportunity to work for one of the fastest growing marketing technology companies. - Comprehensive medical, dental, and vision insurance - 401(k) plan - Monthly discretionary reimbursement towards fitness, home office and/or Learning and Development opportunities - Generous Parental Leave & flexible vacation policy Bluecore is a marketing technology company that’s reimagining how the world’s fastest growing retail brands transform casual shoppers into lifetime customers. Through our patented retail data model and the recent release of Bluecore Communicate™ and Bluecore Site™, we replace manual processes with an intelligent, AI-driven workflow. We are credited with doubling email revenue, and increasing customer retention for more than 400 brands, including Express, Tommy Hilfiger, The North Face, Teleflora, and Bass Pro Shops. We have been recognized as one of the Best Places to Work by Glassdoor and ranked No. 241 on the Inc. 500 List, the most prestigious ranking of the nation’s fastest-growing private companies. We are proud of the culture of flexibility, inclusivity and trust that we have built around our workforce. We are a remote first organization with the option to potentially work in our New York headquarters on occasion moving forward. We love the opportunity to come together – but employees will always have the option on where they work best. At Bluecore we believe in encouraging an inclusive environment in which employees feel encouraged to share their unique perspectives, demonstrate their strengths, and act authentically. We know that diverse teams are strong teams, and welcome those from all backgrounds and varying experiences. Bluecore is a proud equal opportunity employer. We are committed to fair hiring practices and to building a welcoming environment for all team members. All qualified applicants will receive consideration for employment without regard to race, color, religion, gender, gender identity or expression, sexual orientation, national origin, disability, age, familial status or veteran status. We will ensure that individuals with disabilities are provided reasonable accommodation to participate in the job application or interview process, to perform essential job functions, and to receive other benefits and privileges of employment. Please contact us to request accommodation.",2025-08-27T23:02:58,manual,the_muse_api,"New York, NY",external
6020265,Senior Data Scientist,KeepTruckin,"San Francisco, CA",2024-11-01,https://www.themuse.com/jobs/keeptruckin/senior-data-scientist-26bbf2,external,Senior Level,Data and Analytics,"Who We Are KeepTruckin builds technology using the latest advancements in AI and computer vision to improve the safety and efficiency of businesses with vehicles. Over one million drivers and 100,000 companies across every industry trust KeepTruckin’s integrated platform to power their operations, prevent accidents and reduce costs. At KeepTruckin, we see our hard work rewarded in tangible ways every day, and we believe that intelligence is most powerful when paired with humility. We’re motivated by the opportunity to save lives on the road while impacting and improving every facet of multiple trillion-dollar industries that keep our physical economy moving. KeepTruckin is proud to be a Forbes Cloud 100 company, Forbes Best Startup Employer 2020, and an Inc Magazine Best Place to Work. About the Job: We are looking for a Senior Data Scientist to build the models that power a key new focus area for KeepTruckin. As a member of our team you’ll help frame the problems, build models and products that win customers, and leverage machine learning at a massive scale to solidify KeepTruckin’s technology lead in the connected fleet management space. You’ll embed yourself deep in the world of trucking and help understand the forces that govern it. Working with geospatial, transportation, and economic data, you’ll help optimize the way freight moves throughout North America. Responsibilities: Build, improve, and maintain machine learning models Apply statistical models and quantitative analysis on large datasets to measure and identify areas for improvement/optimization in the product Lead the effort for collection of new data and refinement of existing data sources Identify and help implement tools to analyze this data effectively Educate and steer the organization towards adopting a more data-driven approach Qualifications: Bachelor's degree or higher in a quantitative field, e.g. Computer Science, Math, Economics, or Statistics 4+ years experience in data science, machine learning, and data analysis Expertise in applied probability and statistics Deep understanding of machine learning techniques and algorithms End-to-end deployment data-driven model deployment experience Expertise in data-oriented programming (e.g. SQL) and statistical programming (e.g., Python, R). PySpark experience is a big plus As an equal opportunity employer, we are committed to diversity in the workforce. In accordance with applicable law, we prohibit discrimination against any applicant or employee based on any legally recognized basis, including, but not limited to; race, color, religion, sex (including pregnancy, lactation, childbirth or related medical conditions), sexual orientation, gender identity, age (40 and over), national origin or ancestry, physical or mental disability, genetic information (including testing and characteristics), veteran status, uniformed service member status or any other status protected by federal, state or local law.",2025-08-27T23:02:58,manual,the_muse_api,"San Francisco, CA",external
6641145,Senior Data Engineer,Intapp,"Palo Alto, CA",2024-11-01,https://www.themuse.com/jobs/intapp/senior-data-engineer-27e765,external,Senior Level,Data and Analytics,"The Role: We're hiring a hands-on Senior Data Engineer to be part of Data Solutions organization. The data solutions team builds out Data Pipelines, Data Marts, tools and infrastructure needed to source, validate, clean and process the data to build compelling reports for business leaders. Our team is looking for a Senior Data Engineer to help scale our data efforts. If you have passion for data and want to help build Intapp’s next gen data platform that provides actionable insights to drive client and business outcomes, we’d love to hear from you. You will contribute to the full BI development life cycle, including data ingestion, design, modeling, data migrations, unit testing, performance tuning, deployment activities in an agile environment.  What you’ll do:  Design, develop and document end-to-end pipelines with strong focus on architecture including Data Acquisition, Data Integration, ODS, MDM, Data Warehouse, Data Provisioning, ETL, and BI Partner with business subject matter experts, system analysts, data/application architects and development teams to ensure data design is aligned with business strategy and direction Contribute to build standards around data governance, data security, privacy, data quality and speed of analysis Identify and document best practices, standards and architecture guidelines for the data warehouse and data ecosystem are shared across teams. Dive deep, as required, to assist Data/BI analysts and engineers through technical hurdles impacting delivery. Build data quality and audit framework, data transformations to serve data products that empower data-driven decision making. Understand the analytical objectives to make data engineering and data architecture recommendations and drive informed actions. In order to be successful in this role you need: B.S. degree in computer science, mathematics, statistics or a similar quantitative field 7+ years of proven hands on experience in data engineering, data pipelines, data warehouses (relational, dimensional, columnar, big data), data architecture and metadata management Strong experience with commercial ETL platforms with in-depth knowledge and understanding of ETL methodology & design supporting data transformations transformation layer Knowledge in In-Memory New SQL, NoSQL Experience in Redshift or Snowflake, Spark, Serverless technologies like Athena, Spectrum. Experience with distributed data systems such as Hadoop and related technologies (Spark, Presto, Pig, Hive, etc.) Experience working with databases, APIs for front-end applications Experience in data modeling and reporting tools Proficiency in a major programming language (e.g. Java) and/or a scripting language (Perl/Python) Desire to grow, learn, deliver and a create a strong sense of teamwork It would be amazing if you also have:  Strong knowledge and experience with Agile/Scrum methodology and iterative practices in a service delivery lifecycle. Experience with machine learning, advanced analytics. LI-JS2",2025-08-27T23:02:58,manual,the_muse_api,"Palo Alto, CA",external
7203398,"Senior Machine Learning Engineer - TikTok Search Algorithm (Ranking, Relevance, Understanding, User Engagement)",TikTok,"San Jose, CA",2025-08-03,https://www.themuse.com/jobs/tiktok/senior-machine-learning-engineer-tiktok-search-algorithm-ranking-relevance-understanding-user-engagement,external,Senior Level,Data and Analytics,"Responsibilities About the TeamOn the TikTok Search Team, you will have the opportunity to develop and apply cutting edge machine learning technologies in real-time large-scale systems, which serve billions of search requests every day. Via advanced NLP and multi-modal models, our projects impact and improve the search experience for hundreds of millions of users globally. We embrace a culture of self-direction, intellectual curiosity, openness, and problem-solving.Responsibilities• Optimize the searching quality to provide TikTok's users the best search experience• Combine your understanding of product objectives and take full advantage of modern machine learning and information retrieval techniques to improve search quality and perception• Build the core systems and algorithms development including query understanding, result ranking, query recommendation, system reliability, and etc.• Provide technical leadership to drive search strategy iteration including query understanding, recall & ranking, query recommendation, reliability, etc.• Provide team guidance, regular feedback, education, coaching and mentoring.Qualifications Minimum Qualifications:• Bachelor or advanced degree in computer science or a related technical discipline• Experience as tech lead or leading complex and large projects• Proficient coding skills and strong algorithm & data structure basis• Effective communication and teamwork skills.• Proficiency in the following languages but not limited to: C/C++, Python, Java or GolangPreferred Qualifications:• 5+ years of related industry experience• Experience in one or more of the following areas: NLP, Ranking, Ads, search engine, recommender system, distributed system, and machine learningJob Information[For Pay Transparency] Compensation Description (annually) The base salary range for this position in the selected city is $187040 - $359720 annually.Compensation may vary outside of this range depending on a number of factors, including a candidate's qualifications, skills, competencies and experience, and location. Base pay is one part of the Total Package that is provided to compensate and recognize employees for their work, and this role may be eligible for additional discretionary bonuses/incentives, and restricted stock units.Benefits may vary depending on the nature of employment and the country work location. Employees have day one access to medical, dental, and vision insurance, a 401(k) savings plan with company match, paid parental leave, short-term and long-term disability coverage, life insurance, wellbeing benefits, among others. Employees also receive 10 paid holidays per year, 10 paid sick days per year and 17 days of Paid Personal Time (prorated upon hire with increasing accruals by tenure).The Company reserves the right to modify or change these benefits programs at any time, with or without notice.For Los Angeles County (unincorporated) Candidates:Qualified applicants with arrest or conviction records will be considered for employment in accordance with all federal, state, and local laws including the Los Angeles County Fair Chance Ordinance for Employers and the California Fair Chance Act. Our company believes that criminal history may have a direct, adverse and negative relationship on the following job duties, potentially resulting in the withdrawal of the conditional offer of employment:1. Interacting and occasionally having unsupervised contact with internal/external clients and/or colleagues;2. Appropriately handling and managing confidential information including proprietary and trade secret information and access to information technology systems; and3. Exercising sound judgment.",2025-08-27T23:02:58,manual,the_muse_api,"San Jose, CA",external
7837986,Data Engineer - Data Platform,TikTok,"San Jose, CA",2025-08-03,https://www.themuse.com/jobs/tiktok/data-engineer-data-platform,external,Mid Level,Data and Analytics,"Responsibilities As a data engineer in the data platform team, you will have the opportunity to build, optimize and grow one of the largest data platforms in the world. You'll have the opportunity to gain hands-on experience on all kinds of systems in the data platform ecosystem. Your work will have a direct and huge impact on the company's core products as well as hundreds of millions of users.Responsibilities - What You'll Do• Design and build data transformations efficiently and reliably for different purposes (e.g. reporting, growth analysis, multi-dimensional analysis);• Design and implement reliable, scalable, robust and extensible big data systems that support core products and business;• Establish solid design and best engineering practice for engineers as well as non-technical people.Qualifications Minimum Qualifications:• BS or MS degree in Computer Science or related technical field or equivalent practical experience;• Experience in the Big Data technologies(Hadoop, M/R, Hive, Spark, Metastore, Presto, Flume, Kafka, ClickHouse, Flink etc.);• Experience with performing data analysis, data ingestion and data integration;• Experience with ETL(Extraction, Transformation & Loading) and architecting data systems;• Experience with schema design, data modeling and SQL queries;• Passionate and self-motivated about technologies in the Big Data area.Job Information[For Pay Transparency] Compensation Description (annually) The base salary range for this position in the selected city is $187040 - $438000 annually.Compensation may vary outside of this range depending on a number of factors, including a candidate's qualifications, skills, competencies and experience, and location. Base pay is one part of the Total Package that is provided to compensate and recognize employees for their work, and this role may be eligible for additional discretionary bonuses/incentives, and restricted stock units.Benefits may vary depending on the nature of employment and the country work location. Employees have day one access to medical, dental, and vision insurance, a 401(k) savings plan with company match, paid parental leave, short-term and long-term disability coverage, life insurance, wellbeing benefits, among others. Employees also receive 10 paid holidays per year, 10 paid sick days per year and 17 days of Paid Personal Time (prorated upon hire with increasing accruals by tenure).The Company reserves the right to modify or change these benefits programs at any time, with or without notice.For Los Angeles County (unincorporated) Candidates:Qualified applicants with arrest or conviction records will be considered for employment in accordance with all federal, state, and local laws including the Los Angeles County Fair Chance Ordinance for Employers and the California Fair Chance Act. Our company believes that criminal history may have a direct, adverse and negative relationship on the following job duties, potentially resulting in the withdrawal of the conditional offer of employment:1. Interacting and occasionally having unsupervised contact with internal/external clients and/or colleagues;2. Appropriately handling and managing confidential information including proprietary and trade secret information and access to information technology systems; and3. Exercising sound judgment.",2025-08-27T23:02:58,manual,the_muse_api,"San Jose, CA",external
7971781,Machine Learning Engineer - Trust and Safety Platform,TikTok,Singapore,2025-06-01,https://www.themuse.com/jobs/tiktok/machine-learning-engineer-trust-and-safety-platform,external,Mid Level,Data and Analytics,"Responsibilities Our engineering team is responsible for developing state-of-the-art machine learning models and algorithms to protect our platform and users from the content and behaviors that violate community guidelines and related local regulations. With the continuous efforts from our team, TikTok is able to provide the best user experience and bring joy to everyone in the world.1. Build industry-leading content safety systems for TikTok;2. Develop highly-scalable classifiers, tools, models and algorithms leveraging cutting-edge machine learning, computer vision and data mining technologies;3. Understand product objectives and improve trust and safety strategy and model's performance;4. Work with cross-functional teams to protect TikTok globally.Qualifications Minimum Qualifications1. Major in computer science or a related technical discipline;2. At least 3 years of experience in at least one of the following areas: machine learning, pattern recognition, NLP, data mining, or computer vision. a) Experience in the recommendation, search, advertising, or other related projects.b) Published papers in the top AI conferences or journals is a plus, including KDD, IJCAI, WWW, WSDM, ICML, NeurIPS, CVPR, ECCV, ICCV, ACL, etc.;c) Competition experience in machine learning, data mining, CV, NLP and others.Preferred Qualifications:1. Experience in content moderation or trust & safety is a plus;2. Well understanding of data structures and algorithms;3. Great communication and teamwork skills;4. Passion for techniques and solving challenging problems.",2025-08-27T23:02:58,manual,the_muse_api,Singapore,external
8108963,"Machine Learning Engineer (Recommendation), TikTok e-Commerce",TikTok,Singapore,2025-01-21,https://www.themuse.com/jobs/tiktok/machine-learning-engineer-recommendation-tiktok-ecommerce-d89918,external,Mid Level,Data and Analytics,"Responsibilities TikTok will be prioritizing applicants who have a current right to work in Singapore, and do not require TikTok's sponsorship of a visa.TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy. TikTok has global offices including Los Angeles, New York, London, Paris, Berlin, Dubai, Singapore, Jakarta, Seoul and Tokyo. Why Join UsCreation is the core of TikTok's purpose. Our platform is built to help imaginations thrive. This is doubly true of the teams that make TikTok possible. Together, we inspire creativity and bring joy - a mission we all believe in and aim towards achieving every day. To us, every challenge, no matter how difficult, is an opportunity; to learn, to innovate, and to grow as one team. Status quo? Never. Courage? Always. At TikTok, we create together and grow together. That's how we drive impact - for ourselves, our company, and the communities we serve. Join us.About the teamOur team works on large-scale recommendation systems for various offerings under TikTok and its affiliates, focusing on developing recommendation algorithms/models/strategies. We are committed to developing cutting-edge solutions for e-commerce recommendation systems.Responsibilities- Work on recommendation systems, involving contents of various forms ranging from products, short videos to live streams, with each unified recommendation model fulfilling heterogeneous E-commerce scenarios/goals across multiple countries. - Optimize e-commerce recommendation models at massive scales, using deep learning/transfer learning/multi-task learning techniques.- Data mining and analysis to improve the quality of recommended contents.- Conduct research on various topics, which aim to optimize content recommendation circulation, ranging from ensuring diversity and new discovery in recommendation contents, to cold-start problem for new users/items and discovery of high-quality products/live streamers. - Develop innovative and state-of-the-art e-commerce models and algorithms - Support the production of scalable and optimised AI/machine learning (ML) models - Focus on building algorithms for the extraction, transformation and loading of large volumes of realtime, unstructured data to deploy AI/ML solutions from theoretical data science models- Run experiments to test the performance of deployed models, and identifies and resolves bugs that arise in the process- Work in a team setting and apply knowledge in statistics, scripting and programming languages required by the firm.- Work with the relevant software platforms in which the models are deployedQualifications - Strong in data structures and algorithms, with excellent problem-solving ability and programming skills- Experience in applied machine learning, familiar with one or more of the algorithms such as Collaborative Filtering, Matrix Factorization, Factorization Machines, Word2vec, Logistic Regression, Gradient Boosting Trees, Deep Neural Networks etc. - Experience in working with main components of recommendation systems(recall, sort, reranking, cold-start problem), with good understanding of mainstream recommendation models used in the industry- Experience in C++ and Python; at least one of the Big Data tools (For eg. Hive sql/Spark/Mapreduce; at least one of the Deep Learning tools(For eg. Tensorflow/Pytorch)- Possess strong communication skills, positive mindset, good teamwork skills, and eagerness to learn/implement new technology and experimentPreferred Qualifications- Experience in personalized recommendation, online advertising, information retrieval or related fields.- Publications at KDD, NeurIPS, WWW, SIGIR, WSDM, CIKM, ICLR, ICML, IJCAI, AAAI, RecSys and related conferences- Excellent performance in data mining, machine learning, or ACM-ICPC/NOI/IOI competitions- Developed widely-recognized machine learning project(s) on github or personal webpageTikTok is committed to creating an inclusive space where employees are valued for their skills, experiences, and unique perspectives. Our platform connects people from across the globe and so does our workplace. At TikTok, our mission is to inspire creativity and bring joy. To achieve that goal, we are committed to celebrating our diverse voices and to creating an environment that reflects the many communities we reach. We are passionate about this and hope you are too.",2025-08-27T23:02:58,manual,the_muse_api,Singapore,external
8211784,BI Developer III,SelectQuote,"Overland Park, KS",2022-02-18,https://www.themuse.com/jobs/selectquote/bi-developer-iii-d405fb,external,Senior Level,Data and Analytics,"Founded in 1985, SelectQuote provides solutions that help consumers protect their most valuable assets: their families, health and property. The company pioneered the model of providing unbiased comparisons from multiple, highly rated insurance companies allowing consumers to choose the policy and terms that best meet their unique needs. Two foundational pillars underpin SelectQuote's success: a strong force of highly trained and skilled agents, who provide consultative needs analysis for every consumer, and proprietary technology that sources and routes high-quality leads. The company has three core business lines: SelectQuote Senior, SelectQuote Life and SelectQuote Auto and Home. SelectQuote Senior, the largest and fastest-growing business, serves the needs of a demographic that sees 10,000 people turn 65 each day with a range of Medicare Advantage and Medicare Supplement plans. Job Summary: Responsible for the design, development, and maintenance of reporting artifacts and data visualization dashboards using complex datasets from a variety of sources. Team members should have a strong understanding of a variety of visualization tools and the underlying coding languages to develop within those platforms. Team members will also regularly work with stakeholders across the organization to understand business requirements and translating those into appropriate and best-practice visual story telling elements. Essential Functions: Design and develop reports and dashboards in a highly visual nature based on business requirements Deploy and maintain reporting and dashboarding artifacts in our selected visualization tool (Amazon Quicksight and Microsoft PowerBI) Identify areas of opportunity and improvement for existing reporting elements and communicate those with stakeholders Work with stakeholders to develop business requirements for projects across all divisions Collaborate and mentor peer groups on visualization best practices Work with other members of the data team to articulate ETL, architecture, or data science needs Lead complex data project teams involving multiple stakeholder constituencies to ensure accurate execution emphasizing deliverable business value Knowledge, Skills, and Abilities: 5-7+ years working in a BI, Reporting, or Visualization role with a history of increasing responsibility and capability Intermediate SQL background with the ability to troubleshoot complex queries Intermediate to advanced background with a data visualization tool, with preferred experience in Amazon Quicksight and/or MS PowerBI Comfortable with MS Office suite, particularly MS Excel Familiarity with data language such as R or Python is preferred Strong, detail-oriented technical aptitude that enjoys working with data Ability to manage multiple projects and adjust focus as prioritization shifts Seeks performance and technical feedback as well as comfortable mentoring peers Seeks opportunities for professional development and improvement Excellent communicator with individuals and groups across the organization with the ability to manage stakeholder expectations Benefits:It's an exciting time to join SelectQuote. We became a publicly traded company in 2020 with the first 100% virtual IPO (non-biotech) in American history. We have also been recognized nationally on the 2021 Top Workplaces USA list and by the Kansas City Business Journal as a 2020 Best Places to Work honoree.Full-time employees are eligible for medical, dental, vision, voluntary short-term disability, company-paid long term disability, company-paid life insurance and accidental death & dismemberment (AD&D), 401(k) + company match and 100% vesting after 4 years, discretionary profit sharing, employee stock purchase program (espp), paid time off, floating holidays, paid maternity leave, paid parental bonding leave, tuition reimbursement, jury duty pay, work from home stipend, and other paid leaves vary based on work location.",2025-08-27T23:02:58,manual,the_muse_api,"Overland Park, KS",external
